From: Yuka Kawajiri yukx00@gmail.com
[ Upstream commit 512eb73cfd1208898cf10cb06094e0ee0bb53b58 ]
Add touchscreen info for RWC NANOTE P8 (AY07J) 2-in-1.
Signed-off-by: Yuka Kawajiri yukx00@gmail.com Link: https://lore.kernel.org/r/20220111154019.4599-1-yukx00@gmail.com Reviewed-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/platform/x86/touchscreen_dmi.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+)
diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 59b7e90cd5875..ab6a9369649db 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -756,6 +756,21 @@ static const struct ts_dmi_data predia_basic_data = { .properties = predia_basic_props, };
+static const struct property_entry rwc_nanote_p8_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-y", 46), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1728), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1140), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-rwc-nanote-p8.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + { } +}; + +static const struct ts_dmi_data rwc_nanote_p8_data = { + .acpi_name = "MSSL1680:00", + .properties = rwc_nanote_p8_props, +}; + static const struct property_entry schneider_sct101ctm_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 1715), PROPERTY_ENTRY_U32("touchscreen-size-y", 1140), @@ -1326,6 +1341,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "0E57"), }, }, + { + /* RWC NANOTE P8 */ + .driver_data = (void *)&rwc_nanote_p8_data, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Default string"), + DMI_MATCH(DMI_PRODUCT_NAME, "AY07J"), + DMI_MATCH(DMI_PRODUCT_SKU, "0001") + }, + }, { /* Schneider SCT101CTM */ .driver_data = (void *)&schneider_sct101ctm_data,
From: Srinivas Pandruvada srinivas.pandruvada@linux.intel.com
[ Upstream commit 17da2d5f93692086dd096a975225ffd5622d0bf8 ]
As reported:
[ 256.104522] ====================================================== [ 256.113783] WARNING: possible circular locking dependency detected [ 256.120093] 5.16.0-rc6-yocto-standard+ #99 Not tainted [ 256.125362] ------------------------------------------------------ [ 256.131673] intel-speed-sel/844 is trying to acquire lock: [ 256.137290] ffffffffc036f0d0 (punit_misc_dev_lock){+.+.}-{3:3}, at: isst_if_open+0x18/0x90 [isst_if_common] [ 256.147171] [ 256.147171] but task is already holding lock: [ 256.153135] ffffffff8ee7cb50 (misc_mtx){+.+.}-{3:3}, at: misc_open+0x2a/0x170 [ 256.160407] [ 256.160407] which lock already depends on the new lock. [ 256.160407] [ 256.168712] [ 256.168712] the existing dependency chain (in reverse order) is: [ 256.176327] [ 256.176327] -> #1 (misc_mtx){+.+.}-{3:3}: [ 256.181946] lock_acquire+0x1e6/0x330 [ 256.186265] __mutex_lock+0x9b/0x9b0 [ 256.190497] mutex_lock_nested+0x1b/0x20 [ 256.195075] misc_register+0x32/0x1a0 [ 256.199390] isst_if_cdev_register+0x65/0x180 [isst_if_common] [ 256.205878] isst_if_probe+0x144/0x16e [isst_if_mmio] ... [ 256.241976] [ 256.241976] -> #0 (punit_misc_dev_lock){+.+.}-{3:3}: [ 256.248552] validate_chain+0xbc6/0x1750 [ 256.253131] __lock_acquire+0x88c/0xc10 [ 256.257618] lock_acquire+0x1e6/0x330 [ 256.261933] __mutex_lock+0x9b/0x9b0 [ 256.266165] mutex_lock_nested+0x1b/0x20 [ 256.270739] isst_if_open+0x18/0x90 [isst_if_common] [ 256.276356] misc_open+0x100/0x170 [ 256.280409] chrdev_open+0xa5/0x1e0 ...
The call sequence suggested that misc_device /dev file can be opened before misc device is yet to be registered, which is done only once.
Here punit_misc_dev_lock was used as common lock, to protect the registration by multiple ISST HW drivers, one time setup, prevent duplicate registry of misc device and prevent load/unload when device is open.
We can split into locks: - One which just prevent duplicate call to misc_register() and one time setup. Also never call again if the misc_register() failed or required one time setup is failed. This lock is not shared with any misc device callbacks.
- The other lock protects registry, load and unload of HW drivers.
Sequence in isst_if_cdev_register() - Register callbacks under punit_misc_dev_open_lock - Call isst_misc_reg() which registers misc_device on the first registry which is under punit_misc_dev_reg_lock, which is not shared with callbacks.
Sequence in isst_if_cdev_unregister Just opposite of isst_if_cdev_register
Reported-and-tested-by: Liwei Song liwei.song@windriver.com Signed-off-by: Srinivas Pandruvada srinivas.pandruvada@linux.intel.com Link: https://lore.kernel.org/r/20220112022521.54669-1-srinivas.pandruvada@linux.i... Reviewed-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../intel_speed_select_if/isst_if_common.c | 97 ++++++++++++------- 1 file changed, 63 insertions(+), 34 deletions(-)
diff --git a/drivers/platform/x86/intel_speed_select_if/isst_if_common.c b/drivers/platform/x86/intel_speed_select_if/isst_if_common.c index 0c2aa22c7a12e..407afafc7e83f 100644 --- a/drivers/platform/x86/intel_speed_select_if/isst_if_common.c +++ b/drivers/platform/x86/intel_speed_select_if/isst_if_common.c @@ -532,7 +532,10 @@ static long isst_if_def_ioctl(struct file *file, unsigned int cmd, return ret; }
-static DEFINE_MUTEX(punit_misc_dev_lock); +/* Lock to prevent module registration when already opened by user space */ +static DEFINE_MUTEX(punit_misc_dev_open_lock); +/* Lock to allow one share misc device for all ISST interace */ +static DEFINE_MUTEX(punit_misc_dev_reg_lock); static int misc_usage_count; static int misc_device_ret; static int misc_device_open; @@ -542,7 +545,7 @@ static int isst_if_open(struct inode *inode, struct file *file) int i, ret = 0;
/* Fail open, if a module is going away */ - mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); for (i = 0; i < ISST_IF_DEV_MAX; ++i) { struct isst_if_cmd_cb *cb = &punit_callbacks[i];
@@ -564,7 +567,7 @@ static int isst_if_open(struct inode *inode, struct file *file) } else { misc_device_open++; } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock);
return ret; } @@ -573,7 +576,7 @@ static int isst_if_relase(struct inode *inode, struct file *f) { int i;
- mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); misc_device_open--; for (i = 0; i < ISST_IF_DEV_MAX; ++i) { struct isst_if_cmd_cb *cb = &punit_callbacks[i]; @@ -581,7 +584,7 @@ static int isst_if_relase(struct inode *inode, struct file *f) if (cb->registered) module_put(cb->owner); } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock);
return 0; } @@ -598,6 +601,43 @@ static struct miscdevice isst_if_char_driver = { .fops = &isst_if_char_driver_ops, };
+static int isst_misc_reg(void) +{ + mutex_lock(&punit_misc_dev_reg_lock); + if (misc_device_ret) + goto unlock_exit; + + if (!misc_usage_count) { + misc_device_ret = isst_if_cpu_info_init(); + if (misc_device_ret) + goto unlock_exit; + + misc_device_ret = misc_register(&isst_if_char_driver); + if (misc_device_ret) { + isst_if_cpu_info_exit(); + goto unlock_exit; + } + } + misc_usage_count++; + +unlock_exit: + mutex_unlock(&punit_misc_dev_reg_lock); + + return misc_device_ret; +} + +static void isst_misc_unreg(void) +{ + mutex_lock(&punit_misc_dev_reg_lock); + if (misc_usage_count) + misc_usage_count--; + if (!misc_usage_count && !misc_device_ret) { + misc_deregister(&isst_if_char_driver); + isst_if_cpu_info_exit(); + } + mutex_unlock(&punit_misc_dev_reg_lock); +} + /** * isst_if_cdev_register() - Register callback for IOCTL * @device_type: The device type this callback handling. @@ -615,38 +655,31 @@ static struct miscdevice isst_if_char_driver = { */ int isst_if_cdev_register(int device_type, struct isst_if_cmd_cb *cb) { - if (misc_device_ret) - return misc_device_ret; + int ret;
if (device_type >= ISST_IF_DEV_MAX) return -EINVAL;
- mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); + /* Device is already open, we don't want to add new callbacks */ if (misc_device_open) { - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); return -EAGAIN; } - if (!misc_usage_count) { - int ret; - - misc_device_ret = misc_register(&isst_if_char_driver); - if (misc_device_ret) - goto unlock_exit; - - ret = isst_if_cpu_info_init(); - if (ret) { - misc_deregister(&isst_if_char_driver); - misc_device_ret = ret; - goto unlock_exit; - } - } memcpy(&punit_callbacks[device_type], cb, sizeof(*cb)); punit_callbacks[device_type].registered = 1; - misc_usage_count++; -unlock_exit: - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock);
- return misc_device_ret; + ret = isst_misc_reg(); + if (ret) { + /* + * No need of mutex as the misc device register failed + * as no one can open device yet. Hence no contention. + */ + punit_callbacks[device_type].registered = 0; + return ret; + } + return 0; } EXPORT_SYMBOL_GPL(isst_if_cdev_register);
@@ -661,16 +694,12 @@ EXPORT_SYMBOL_GPL(isst_if_cdev_register); */ void isst_if_cdev_unregister(int device_type) { - mutex_lock(&punit_misc_dev_lock); - misc_usage_count--; + isst_misc_unreg(); + mutex_lock(&punit_misc_dev_open_lock); punit_callbacks[device_type].registered = 0; if (device_type == ISST_IF_DEV_MBOX) isst_delete_hash(); - if (!misc_usage_count && !misc_device_ret) { - misc_deregister(&isst_if_char_driver); - isst_if_cpu_info_exit(); - } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); } EXPORT_SYMBOL_GPL(isst_if_cdev_unregister);
From: Nícolas F. R. A. Prado nfraprado@collabora.com
[ Upstream commit f034cc1301e7d83d4ec428dd6b8ffb57ca446efb ]
The timeout setting for the rtc kselftest is currently 90 seconds. This setting is used by the kselftest runner to stop running a test if it takes longer than the assigned value.
However, two of the test cases inside rtc set alarms. These alarms are set to the next beginning of the minute, so each of these test cases may take up to, in the worst case, 60 seconds.
In order to allow for all test cases in rtc to run, even in the worst case, when using the kselftest runner, the timeout value should be increased to at least 120. Set it to 180, so there's some additional slack.
Correct operation can be tested by running the following command right after the start of a minute (low second count), and checking that all test cases run:
./run_kselftest.sh -c rtc
Signed-off-by: Nícolas F. R. A. Prado nfraprado@collabora.com Acked-by: Alexandre Belloni alexandre.belloni@bootlin.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/rtc/settings | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/rtc/settings b/tools/testing/selftests/rtc/settings index ba4d85f74cd6b..a953c96aa16e1 100644 --- a/tools/testing/selftests/rtc/settings +++ b/tools/testing/selftests/rtc/settings @@ -1 +1 @@ -timeout=90 +timeout=180
From: Li Zhijian lizhijian@cn.fujitsu.com
[ Upstream commit 92d25637a3a45904292c93f1863c6bbda4e3e38f ]
We have some many cases that will create child process as well, such as pidfd_wait. Previously, we will signal/kill the parent process when it is time out, but this signal will not be sent to its child process. In such case, if child process doesn't terminate itself, ksefltest framework will hang forever.
Here we group all its child processes so that kill() can signal all of them in timeout.
Fixed change log: Shuah Khan skhan@linuxfoundation.org
Suggested-by: yang xu xuyang2018.jy@cn.fujitsu.com Signed-off-by: Li Zhijian lizhijian@cn.fujitsu.com Acked-by: Christian Brauner christian.brauner@ubuntu.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/kselftest_harness.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 5ecb9718e1616..3e7b2e521cde4 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -871,7 +871,8 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext) }
t->timed_out = true; - kill(t->pid, SIGKILL); + // signal process group + kill(-(t->pid), SIGKILL); }
void __wait_for_test(struct __test_metadata *t) @@ -981,6 +982,7 @@ void __run_test(struct __fixture_metadata *f, ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); t->passed = 0; } else if (t->pid == 0) { + setpgrp(); t->fn(t, variant); if (t->skip) _exit(255);
From: Miquel Raynal miquel.raynal@bootlin.com
[ Upstream commit e5ce576d45bf72fd0e3dc37eff897bfcc488f6a9 ]
Upon error the ieee802154_xmit_complete() helper is not called. Only ieee802154_wake_queue() is called manually. In the Tx case we then leak the skb structure.
Free the skb structure upon error before returning when appropriate.
As the 'is_tx = 0' cannot be moved in the complete handler because of a possible race between the delay in switching to STATE_RX_AACK_ON and a new interrupt, we introduce an intermediate 'was_tx' boolean just for this purpose.
There is no Fixes tag applying here, many changes have been made on this area and the issue kind of always existed.
Suggested-by: Alexander Aring alex.aring@gmail.com Signed-off-by: Miquel Raynal miquel.raynal@bootlin.com Acked-by: Alexander Aring aahringo@redhat.com Link: https://lore.kernel.org/r/20220125121426.848337-4-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt stefan@datenfreihafen.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ieee802154/at86rf230.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 7d67f41387f55..4f5ef8a9a9a87 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -100,6 +100,7 @@ struct at86rf230_local { unsigned long cal_timeout; bool is_tx; bool is_tx_from_off; + bool was_tx; u8 tx_retry; struct sk_buff *tx_skb; struct at86rf230_state_change tx; @@ -343,7 +344,11 @@ at86rf230_async_error_recover_complete(void *context) if (ctx->free) kfree(ctx);
- ieee802154_wake_queue(lp->hw); + if (lp->was_tx) { + lp->was_tx = 0; + dev_kfree_skb_any(lp->tx_skb); + ieee802154_wake_queue(lp->hw); + } }
static void @@ -352,7 +357,11 @@ at86rf230_async_error_recover(void *context) struct at86rf230_state_change *ctx = context; struct at86rf230_local *lp = ctx->lp;
- lp->is_tx = 0; + if (lp->is_tx) { + lp->was_tx = 1; + lp->is_tx = 0; + } + at86rf230_async_state_change(lp, ctx, STATE_RX_AACK_ON, at86rf230_async_error_recover_complete); }
From: Yang Xu xuyang2018.jy@fujitsu.com
[ Upstream commit fc4eb486a59d70bd35cf1209f0e68c2d8b979193 ]
Since commit 43209ea2d17a ("zram: remove max_comp_streams internals"), zram has switched to per-cpu streams. Even kernel still keep this interface for some reasons, but writing to max_comp_stream doesn't take any effect. So skip it on newer kernel ie 4.7.
The code that comparing kernel version is from xfstests testsuite ext4/053.
Signed-off-by: Yang Xu xuyang2018.jy@fujitsu.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/zram/zram_lib.sh | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+)
diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh index 6f872f266fd11..f47fc0f27e99e 100755 --- a/tools/testing/selftests/zram/zram_lib.sh +++ b/tools/testing/selftests/zram/zram_lib.sh @@ -11,6 +11,9 @@ dev_mounted=-1
# Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +kernel_version=`uname -r | cut -d'.' -f1,2` +kernel_major=${kernel_version%.*} +kernel_minor=${kernel_version#*.}
trap INT
@@ -25,6 +28,20 @@ check_prereqs() fi }
+kernel_gte() +{ + major=${1%.*} + minor=${1#*.} + + if [ $kernel_major -gt $major ]; then + return 0 + elif [[ $kernel_major -eq $major && $kernel_minor -ge $minor ]]; then + return 0 + fi + + return 1 +} + zram_cleanup() { echo "zram cleanup" @@ -86,6 +103,13 @@ zram_max_streams() { echo "set max_comp_streams to zram device(s)"
+ kernel_gte 4.7 + if [ $? -eq 0 ]; then + echo "The device attribute max_comp_streams was"\ + "deprecated in 4.7" + return 0 + fi + local i=0 for max_s in $zram_max_streams; do local sys_path="/sys/block/zram${i}/max_comp_streams"
From: Yang Xu xuyang2018.jy@fujitsu.com
[ Upstream commit d18da7ec3719559d6e74937266d0416e6c7e0b31 ]
zram01 uses `free -m` to measure zram memory usage. The results are no sense because they are polluted by all running processes on the system.
We Should only calculate the free memory delta for the current process. So use the third field of /sys/block/zram<id>/mm_stat to measure memory usage instead. The file is available since kernel 4.1.
orig_data_size(first): uncompressed size of data stored in this disk. compr_data_size(second): compressed size of data stored in this disk mem_used_total(third): the amount of memory allocated for this disk
Also remove useless zram cleanup call in zram_fill_fs and so we don't need to cleanup zram twice if fails.
Signed-off-by: Yang Xu xuyang2018.jy@fujitsu.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/zram/zram01.sh | 30 +++++++------------------- 1 file changed, 8 insertions(+), 22 deletions(-)
diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh index 114863d9fb876..e9e9eb777e2c7 100755 --- a/tools/testing/selftests/zram/zram01.sh +++ b/tools/testing/selftests/zram/zram01.sh @@ -33,8 +33,6 @@ zram_algs="lzo"
zram_fill_fs() { - local mem_free0=$(free -m | awk 'NR==2 {print $4}') - for i in $(seq 0 $(($dev_num - 1))); do echo "fill zram$i..." local b=0 @@ -45,29 +43,17 @@ zram_fill_fs() b=$(($b + 1)) done echo "zram$i can be filled with '$b' KB" - done
- local mem_free1=$(free -m | awk 'NR==2 {print $4}') - local used_mem=$(($mem_free0 - $mem_free1)) + local mem_used_total=`awk '{print $3}' "/sys/block/zram$i/mm_stat"` + local v=$((100 * 1024 * $b / $mem_used_total)) + if [ "$v" -lt 100 ]; then + echo "FAIL compression ratio: 0.$v:1" + ERR_CODE=-1 + return + fi
- local total_size=0 - for sm in $zram_sizes; do - local s=$(echo $sm | sed 's/M//') - total_size=$(($total_size + $s)) + echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK" done - - echo "zram used ${used_mem}M, zram disk sizes ${total_size}M" - - local v=$((100 * $total_size / $used_mem)) - - if [ "$v" -lt 100 ]; then - echo "FAIL compression ratio: 0.$v:1" - ERR_CODE=-1 - zram_cleanup - return - fi - - echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK" }
check_prereqs
From: Yang Xu xuyang2018.jy@fujitsu.com
[ Upstream commit 01dabed20573804750af5c7bf8d1598a6bf7bf6e ]
If zram-generator package is installed and works, then we can not remove zram module because zram swap is being used. This case needs a clean zram environment, change this test by using hot_add/hot_remove interface. So even zram device is being used, we still can add zram device and remove them in cleanup.
The two interface was introduced since kernel commit 6566d1a32bf7("zram: add dynamic device add/remove functionality") in v4.2-rc1. If kernel supports these two interface, we use hot_add/hot_remove to slove this problem, if not, just check whether zram is being used or built in, then skip it on old kernel.
Signed-off-by: Yang Xu xuyang2018.jy@fujitsu.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/zram/zram.sh | 15 +--- tools/testing/selftests/zram/zram01.sh | 3 +- tools/testing/selftests/zram/zram02.sh | 1 - tools/testing/selftests/zram/zram_lib.sh | 110 +++++++++++++---------- 4 files changed, 66 insertions(+), 63 deletions(-)
diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh index 232e958ec4547..b0b91d9b0dc21 100755 --- a/tools/testing/selftests/zram/zram.sh +++ b/tools/testing/selftests/zram/zram.sh @@ -2,9 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 TCID="zram.sh"
-# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - . ./zram_lib.sh
run_zram () { @@ -18,14 +15,4 @@ echo ""
check_prereqs
-# check zram module exists -MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko -if [ -f $MODULE_PATH ]; then - run_zram -elif [ -b /dev/zram0 ]; then - run_zram -else - echo "$TCID : No zram.ko module or /dev/zram0 device file not found" - echo "$TCID : CONFIG_ZRAM is not set" - exit $ksft_skip -fi +run_zram diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh index e9e9eb777e2c7..8f4affe34f3e4 100755 --- a/tools/testing/selftests/zram/zram01.sh +++ b/tools/testing/selftests/zram/zram01.sh @@ -33,7 +33,7 @@ zram_algs="lzo"
zram_fill_fs() { - for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo "fill zram$i..." local b=0 while [ true ]; do @@ -67,7 +67,6 @@ zram_mount
zram_fill_fs zram_cleanup -zram_unload
if [ $ERR_CODE -ne 0 ]; then echo "$TCID : [FAIL]" diff --git a/tools/testing/selftests/zram/zram02.sh b/tools/testing/selftests/zram/zram02.sh index e83b404807c09..2418b0c4ed136 100755 --- a/tools/testing/selftests/zram/zram02.sh +++ b/tools/testing/selftests/zram/zram02.sh @@ -36,7 +36,6 @@ zram_set_memlimit zram_makeswap zram_swapoff zram_cleanup -zram_unload
if [ $ERR_CODE -ne 0 ]; then echo "$TCID : [FAIL]" diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh index f47fc0f27e99e..21ec1966de76c 100755 --- a/tools/testing/selftests/zram/zram_lib.sh +++ b/tools/testing/selftests/zram/zram_lib.sh @@ -5,10 +5,12 @@ # Author: Alexey Kodanev alexey.kodanev@oracle.com # Modified: Naresh Kamboju naresh.kamboju@linaro.org
-MODULE=0 dev_makeswap=-1 dev_mounted=-1 - +dev_start=0 +dev_end=-1 +module_load=-1 +sys_control=-1 # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 kernel_version=`uname -r | cut -d'.' -f1,2` @@ -46,57 +48,72 @@ zram_cleanup() { echo "zram cleanup" local i= - for i in $(seq 0 $dev_makeswap); do + for i in $(seq $dev_start $dev_makeswap); do swapoff /dev/zram$i done
- for i in $(seq 0 $dev_mounted); do + for i in $(seq $dev_start $dev_mounted); do umount /dev/zram$i done
- for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo 1 > /sys/block/zram${i}/reset rm -rf zram$i done
-} + if [ $sys_control -eq 1 ]; then + for i in $(seq $dev_start $dev_end); do + echo $i > /sys/class/zram-control/hot_remove + done + fi
-zram_unload() -{ - if [ $MODULE -ne 0 ] ; then - echo "zram rmmod zram" + if [ $module_load -eq 1 ]; then rmmod zram > /dev/null 2>&1 fi }
zram_load() { - # check zram module exists - MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko - if [ -f $MODULE_PATH ]; then - MODULE=1 - echo "create '$dev_num' zram device(s)" - modprobe zram num_devices=$dev_num - if [ $? -ne 0 ]; then - echo "failed to insert zram module" - exit 1 - fi - - dev_num_created=$(ls /dev/zram* | wc -w) + echo "create '$dev_num' zram device(s)" + + # zram module loaded, new kernel + if [ -d "/sys/class/zram-control" ]; then + echo "zram modules already loaded, kernel supports" \ + "zram-control interface" + dev_start=$(ls /dev/zram* | wc -w) + dev_end=$(($dev_start + $dev_num - 1)) + sys_control=1 + + for i in $(seq $dev_start $dev_end); do + cat /sys/class/zram-control/hot_add > /dev/null + done + + echo "all zram devices (/dev/zram$dev_start~$dev_end" \ + "successfully created" + return 0 + fi
- if [ "$dev_num_created" -ne "$dev_num" ]; then - echo "unexpected num of devices: $dev_num_created" - ERR_CODE=-1 + # detect old kernel or built-in + modprobe zram num_devices=$dev_num + if [ ! -d "/sys/class/zram-control" ]; then + if grep -q '^zram' /proc/modules; then + rmmod zram > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "zram module is being used on old kernel" \ + "without zram-control interface" + exit $ksft_skip + fi else - echo "zram load module successful" + echo "test needs CONFIG_ZRAM=m on old kernel without" \ + "zram-control interface" + exit $ksft_skip fi - elif [ -b /dev/zram0 ]; then - echo "/dev/zram0 device file found: OK" - else - echo "ERROR: No zram.ko module or no /dev/zram0 device found" - echo "$TCID : CONFIG_ZRAM is not set" - exit 1 + modprobe zram num_devices=$dev_num fi + + module_load=1 + dev_end=$(($dev_num - 1)) + echo "all zram devices (/dev/zram0~$dev_end) successfully created" }
zram_max_streams() @@ -110,7 +127,7 @@ zram_max_streams() return 0 fi
- local i=0 + local i=$dev_start for max_s in $zram_max_streams; do local sys_path="/sys/block/zram${i}/max_comp_streams" echo $max_s > $sys_path || \ @@ -122,7 +139,7 @@ zram_max_streams() echo "FAIL can't set max_streams '$max_s', get $max_stream"
i=$(($i + 1)) - echo "$sys_path = '$max_streams' ($i/$dev_num)" + echo "$sys_path = '$max_streams'" done
echo "zram max streams: OK" @@ -132,15 +149,16 @@ zram_compress_alg() { echo "test that we can set compression algorithm"
- local algs=$(cat /sys/block/zram0/comp_algorithm) + local i=$dev_start + local algs=$(cat /sys/block/zram${i}/comp_algorithm) echo "supported algs: $algs" - local i=0 + for alg in $zram_algs; do local sys_path="/sys/block/zram${i}/comp_algorithm" echo "$alg" > $sys_path || \ echo "FAIL can't set '$alg' to $sys_path" i=$(($i + 1)) - echo "$sys_path = '$alg' ($i/$dev_num)" + echo "$sys_path = '$alg'" done
echo "zram set compression algorithm: OK" @@ -149,14 +167,14 @@ zram_compress_alg() zram_set_disksizes() { echo "set disk size to zram device(s)" - local i=0 + local i=$dev_start for ds in $zram_sizes; do local sys_path="/sys/block/zram${i}/disksize" echo "$ds" > $sys_path || \ echo "FAIL can't set '$ds' to $sys_path"
i=$(($i + 1)) - echo "$sys_path = '$ds' ($i/$dev_num)" + echo "$sys_path = '$ds'" done
echo "zram set disksizes: OK" @@ -166,14 +184,14 @@ zram_set_memlimit() { echo "set memory limit to zram device(s)"
- local i=0 + local i=$dev_start for ds in $zram_mem_limits; do local sys_path="/sys/block/zram${i}/mem_limit" echo "$ds" > $sys_path || \ echo "FAIL can't set '$ds' to $sys_path"
i=$(($i + 1)) - echo "$sys_path = '$ds' ($i/$dev_num)" + echo "$sys_path = '$ds'" done
echo "zram set memory limit: OK" @@ -182,8 +200,8 @@ zram_set_memlimit() zram_makeswap() { echo "make swap with zram device(s)" - local i=0 - for i in $(seq 0 $(($dev_num - 1))); do + local i=$dev_start + for i in $(seq $dev_start $dev_end); do mkswap /dev/zram$i > err.log 2>&1 if [ $? -ne 0 ]; then cat err.log @@ -206,7 +224,7 @@ zram_makeswap() zram_swapoff() { local i= - for i in $(seq 0 $dev_makeswap); do + for i in $(seq $dev_start $dev_end); do swapoff /dev/zram$i > err.log 2>&1 if [ $? -ne 0 ]; then cat err.log @@ -220,7 +238,7 @@ zram_swapoff()
zram_makefs() { - local i=0 + local i=$dev_start for fs in $zram_filesystems; do # if requested fs not supported default it to ext2 which mkfs.$fs > /dev/null 2>&1 || fs=ext2 @@ -239,7 +257,7 @@ zram_makefs() zram_mount() { local i=0 - for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo "mount /dev/zram$i" mkdir zram$i mount /dev/zram$i zram$i > /dev/null || \
From: Cristian Marussi cristian.marussi@arm.com
[ Upstream commit e051cdf655fa016692008a446a060eff06222bb5 ]
In E_func() macro, on error, print also errno in order to aid debugging.
Cc: Aleksa Sarai cyphar@cyphar.com Signed-off-by: Cristian Marussi cristian.marussi@arm.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/openat2/helpers.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/tools/testing/selftests/openat2/helpers.h b/tools/testing/selftests/openat2/helpers.h index a6ea27344db2d..ad5d0ba5b6ce9 100644 --- a/tools/testing/selftests/openat2/helpers.h +++ b/tools/testing/selftests/openat2/helpers.h @@ -62,11 +62,12 @@ bool needs_openat2(const struct open_how *how); (similar to chroot(2)). */ #endif /* RESOLVE_IN_ROOT */
-#define E_func(func, ...) \ - do { \ - if (func(__VA_ARGS__) < 0) \ - ksft_exit_fail_msg("%s:%d %s failed\n", \ - __FILE__, __LINE__, #func);\ +#define E_func(func, ...) \ + do { \ + errno = 0; \ + if (func(__VA_ARGS__) < 0) \ + ksft_exit_fail_msg("%s:%d %s failed - errno:%d\n", \ + __FILE__, __LINE__, #func, errno); \ } while (0)
#define E_asprintf(...) E_func(asprintf, __VA_ARGS__)
From: Cristian Marussi cristian.marussi@arm.com
[ Upstream commit ea3396725aa143dd42fe388cb67e44c90d2fb719 ]
Add a dependency on header helpers.h to the main target; while at that add to helpers.h also a missing include for bool types.
Cc: Aleksa Sarai cyphar@cyphar.com Signed-off-by: Cristian Marussi cristian.marussi@arm.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/openat2/Makefile | 2 +- tools/testing/selftests/openat2/helpers.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile index 4b93b1417b862..843ba56d8e49e 100644 --- a/tools/testing/selftests/openat2/Makefile +++ b/tools/testing/selftests/openat2/Makefile @@ -5,4 +5,4 @@ TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test
include ../lib.mk
-$(TEST_GEN_PROGS): helpers.c +$(TEST_GEN_PROGS): helpers.c helpers.h diff --git a/tools/testing/selftests/openat2/helpers.h b/tools/testing/selftests/openat2/helpers.h index ad5d0ba5b6ce9..7056340b9339e 100644 --- a/tools/testing/selftests/openat2/helpers.h +++ b/tools/testing/selftests/openat2/helpers.h @@ -9,6 +9,7 @@
#define _GNU_SOURCE #include <stdint.h> +#include <stdbool.h> #include <errno.h> #include <linux/types.h> #include "../kselftest.h"
From: Cristian Marussi cristian.marussi@arm.com
[ Upstream commit ac9e0a250bb155078601a5b999aab05f2a04d1ab ]
Skip testcases that fail since the requested valid flags combination is not supported by the underlying filesystem.
Cc: Aleksa Sarai cyphar@cyphar.com Signed-off-by: Cristian Marussi cristian.marussi@arm.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/openat2/openat2_test.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c index b386367c606b1..453152b58e7f0 100644 --- a/tools/testing/selftests/openat2/openat2_test.c +++ b/tools/testing/selftests/openat2/openat2_test.c @@ -244,6 +244,16 @@ void test_openat2_flags(void) unlink(path);
fd = sys_openat2(AT_FDCWD, path, &test->how); + if (fd < 0 && fd == -EOPNOTSUPP) { + /* + * Skip the testcase if it failed because not supported + * by FS. (e.g. a valid O_TMPFILE combination on NFS) + */ + ksft_test_result_skip("openat2 with %s fails with %d (%s)\n", + test->name, fd, strerror(-fd)); + goto next; + } + if (test->err >= 0) failed = (fd < 0); else @@ -288,7 +298,7 @@ void test_openat2_flags(void) else resultfn("openat2 with %s fails with %d (%s)\n", test->name, test->err, strerror(-test->err)); - +next: free(fdpath); fflush(stdout); }
From: Cristian Marussi cristian.marussi@arm.com
[ Upstream commit dae1d8ac31896988e7313384c0370176a75e9b45 ]
Report mincore.check_file_mmap as SKIP instead of FAIL if the underlying filesystem lacks support of O_TMPFILE or fallocate since such failures are not really related to mincore functionality.
Cc: Ricardo Cañuelo ricardo.canuelo@collabora.com Signed-off-by: Cristian Marussi cristian.marussi@arm.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- .../selftests/mincore/mincore_selftest.c | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-)
diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c index 5a1e85ff5d32a..2cf6f2f277ab8 100644 --- a/tools/testing/selftests/mincore/mincore_selftest.c +++ b/tools/testing/selftests/mincore/mincore_selftest.c @@ -208,15 +208,21 @@ TEST(check_file_mmap)
errno = 0; fd = open(".", O_TMPFILE | O_RDWR, 0600); - ASSERT_NE(-1, fd) { - TH_LOG("Can't create temporary file: %s", - strerror(errno)); + if (fd < 0) { + ASSERT_EQ(errno, EOPNOTSUPP) { + TH_LOG("Can't create temporary file: %s", + strerror(errno)); + } + SKIP(goto out_free, "O_TMPFILE not supported by filesystem."); } errno = 0; retval = fallocate(fd, 0, 0, FILE_SIZE); - ASSERT_EQ(0, retval) { - TH_LOG("Error allocating space for the temporary file: %s", - strerror(errno)); + if (retval) { + ASSERT_EQ(errno, EOPNOTSUPP) { + TH_LOG("Error allocating space for the temporary file: %s", + strerror(errno)); + } + SKIP(goto out_close, "fallocate not supported by filesystem."); }
/* @@ -272,7 +278,9 @@ TEST(check_file_mmap) }
munmap(addr, FILE_SIZE); +out_close: close(fd); +out_free: free(vec); }
From: Duoming Zhou duoming@zju.edu.cn
[ Upstream commit 4e0f718daf97d47cf7dec122da1be970f145c809 ]
The previous commit 1ade48d0c27d ("ax25: NPD bug when detaching AX25 device") introduce lock_sock() into ax25_kill_by_device to prevent NPD bug. But the concurrency NPD or UAF bug will occur, when lock_sock() or release_sock() dereferences the ax25_cb->sock.
The NULL pointer dereference bug can be shown as below:
ax25_kill_by_device() | ax25_release() | ax25_destroy_socket() | ax25_cb_del() ... | ... | ax25->sk=NULL; lock_sock(s->sk); //(1) | s->ax25_dev = NULL; | ... release_sock(s->sk); //(2) | ... |
The root cause is that the sock is set to null before dereference site (1) or (2). Therefore, this patch extracts the ax25_cb->sock in advance, and uses ax25_list_lock to protect it, which can synchronize with ax25_cb_del() and ensure the value of sock is not null before dereference sites.
The concurrency UAF bug can be shown as below:
ax25_kill_by_device() | ax25_release() | ax25_destroy_socket() ... | ... | sock_put(sk); //FREE lock_sock(s->sk); //(1) | s->ax25_dev = NULL; | ... release_sock(s->sk); //(2) | ... |
The root cause is that the sock is released before dereference site (1) or (2). Therefore, this patch uses sock_hold() to increase the refcount of sock and uses ax25_list_lock to protect it, which can synchronize with ax25_cb_del() in ax25_destroy_socket() and ensure the sock wil not be released before dereference sites.
Signed-off-by: Duoming Zhou duoming@zju.edu.cn Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- net/ax25/af_ax25.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 5e84dce5ff7ae..23bd26057a828 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -77,6 +77,7 @@ static void ax25_kill_by_device(struct net_device *dev) { ax25_dev *ax25_dev; ax25_cb *s; + struct sock *sk;
if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) return; @@ -85,13 +86,15 @@ static void ax25_kill_by_device(struct net_device *dev) again: ax25_for_each(s, &ax25_list) { if (s->ax25_dev == ax25_dev) { + sk = s->sk; + sock_hold(sk); spin_unlock_bh(&ax25_list_lock); - lock_sock(s->sk); + lock_sock(sk); s->ax25_dev = NULL; - release_sock(s->sk); + release_sock(sk); ax25_disconnect(s, ENETUNREACH); spin_lock_bh(&ax25_list_lock); - + sock_put(sk); /* The entry could have been deleted from the * list meanwhile and thus the next pointer is * no longer valid. Play it safe and restart
From: "Darrick J. Wong" djwong@kernel.org
[ Upstream commit 2719c7160dcfaae1f73a1c0c210ad3281c19022e ]
If we fail to synchronize the filesystem while preparing to freeze the fs, abort the freeze.
Signed-off-by: Darrick J. Wong djwong@kernel.org Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Christoph Hellwig hch@lst.de Acked-by: Christian Brauner brauner@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/super.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/fs/super.c b/fs/super.c index 20f1707807bbd..bae3fe80f852e 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1667,11 +1667,9 @@ static void lockdep_sb_freeze_acquire(struct super_block *sb) percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_); }
-static void sb_freeze_unlock(struct super_block *sb) +static void sb_freeze_unlock(struct super_block *sb, int level) { - int level; - - for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--) + for (level--; level >= 0; level--) percpu_up_write(sb->s_writers.rw_sem + level); }
@@ -1742,7 +1740,14 @@ int freeze_super(struct super_block *sb) sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
/* All writers are done so after syncing there won't be dirty data */ - sync_filesystem(sb); + ret = sync_filesystem(sb); + if (ret) { + sb->s_writers.frozen = SB_UNFROZEN; + sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT); + wake_up(&sb->s_writers.wait_unfrozen); + deactivate_locked_super(sb); + return ret; + }
/* Now wait for internal filesystem counter */ sb->s_writers.frozen = SB_FREEZE_FS; @@ -1754,7 +1759,7 @@ int freeze_super(struct super_block *sb) printk(KERN_ERR "VFS:Filesystem freeze failed\n"); sb->s_writers.frozen = SB_UNFROZEN; - sb_freeze_unlock(sb); + sb_freeze_unlock(sb, SB_FREEZE_FS); wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb); return ret; @@ -1805,7 +1810,7 @@ static int thaw_super_locked(struct super_block *sb) }
sb->s_writers.frozen = SB_UNFROZEN; - sb_freeze_unlock(sb); + sb_freeze_unlock(sb, SB_FREEZE_FS); out: wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb);
From: "Darrick J. Wong" djwong@kernel.org
[ Upstream commit dd5532a4994bfda0386eb2286ec00758cee08444 ]
Strangely, dquot_quota_sync ignores the return code from the ->sync_fs call, which means that quotacalls like Q_SYNC never see the error. This doesn't seem right, so fix that.
Signed-off-by: Darrick J. Wong djwong@kernel.org Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Christoph Hellwig hch@lst.de Acked-by: Christian Brauner brauner@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/quota/dquot.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 4f13734637660..09fb8459bb5ce 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -692,9 +692,14 @@ int dquot_quota_sync(struct super_block *sb, int type) /* This is not very clever (and fast) but currently I don't know about * any other simple way of getting quota data to disk and we must get * them there for userspace to be visible... */ - if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, 1); - sync_blockdev(sb->s_bdev); + if (sb->s_op->sync_fs) { + ret = sb->s_op->sync_fs(sb, 1); + if (ret) + return ret; + } + ret = sync_blockdev(sb->s_bdev); + if (ret) + return ret;
/* * Now when everything is written we can discard the pagecache so
From: Su Yue l@damenly.su
[ Upstream commit ea1d1ca4025ac6c075709f549f9aa036b5b6597d ]
Check item size before accessing the device item to avoid out of bound access, similar to inode_item check.
Signed-off-by: Su Yue l@damenly.su Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/tree-checker.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index d4a3a56726aa8..4a5ee516845f7 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -947,6 +947,7 @@ static int check_dev_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { struct btrfs_dev_item *ditem; + const u32 item_size = btrfs_item_size(leaf, slot);
if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) { dev_item_err(leaf, slot, @@ -954,6 +955,13 @@ static int check_dev_item(struct extent_buffer *leaf, key->objectid, BTRFS_DEV_ITEMS_OBJECTID); return -EUCLEAN; } + + if (unlikely(item_size != sizeof(*ditem))) { + dev_item_err(leaf, slot, "invalid item size: has %u expect %zu", + item_size, sizeof(*ditem)); + return -EUCLEAN; + } + ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); if (btrfs_device_id(leaf, ditem) != key->offset) { dev_item_err(leaf, slot,
On Wed, Feb 09, 2022 at 01:40:52PM -0500, Sasha Levin wrote:
From: Su Yue l@damenly.su
[ Upstream commit ea1d1ca4025ac6c075709f549f9aa036b5b6597d ]
Check item size before accessing the device item to avoid out of bound access, similar to inode_item check.
Signed-off-by: Su Yue l@damenly.su Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org
fs/btrfs/tree-checker.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index d4a3a56726aa8..4a5ee516845f7 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -947,6 +947,7 @@ static int check_dev_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { struct btrfs_dev_item *ditem;
- const u32 item_size = btrfs_item_size(leaf, slot);
if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) { dev_item_err(leaf, slot, @@ -954,6 +955,13 @@ static int check_dev_item(struct extent_buffer *leaf, key->objectid, BTRFS_DEV_ITEMS_OBJECTID); return -EUCLEAN; }
- if (unlikely(item_size != sizeof(*ditem))) {
dev_item_err(leaf, slot, "invalid item size: has %u expect %zu",
item_size, sizeof(*ditem));
return -EUCLEAN;
- }
- ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); if (btrfs_device_id(leaf, ditem) != key->offset) { dev_item_err(leaf, slot,
-- 2.34.1
This adds a build warning, showing that the backport is not correct, so I'll go drop this :(
thanks,
greg k-h
On Fri 18 Feb 2022 at 11:36, Greg KH greg@kroah.com wrote:
On Wed, Feb 09, 2022 at 01:40:52PM -0500, Sasha Levin wrote:
From: Su Yue l@damenly.su
[ Upstream commit ea1d1ca4025ac6c075709f549f9aa036b5b6597d ]
Check item size before accessing the device item to avoid out of bound access, similar to inode_item check.
Signed-off-by: Su Yue l@damenly.su Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org
fs/btrfs/tree-checker.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index d4a3a56726aa8..4a5ee516845f7 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -947,6 +947,7 @@ static int check_dev_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { struct btrfs_dev_item *ditem;
const u32 item_size = btrfs_item_size(leaf, slot);
if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) { dev_item_err(leaf, slot,
@@ -954,6 +955,13 @@ static int check_dev_item(struct extent_buffer *leaf, key->objectid, BTRFS_DEV_ITEMS_OBJECTID); return -EUCLEAN; }
- if (unlikely(item_size != sizeof(*ditem))) {
dev_item_err(leaf, slot, "invalid item size: has
%u expect %zu",
item_size, sizeof(*ditem));
return -EUCLEAN;
- }
- ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); if (btrfs_device_id(leaf, ditem) != key->offset) { dev_item_err(leaf, slot,
-- 2.34.1
This adds a build warning, showing that the backport is not correct, so I'll go drop this :(
And the warning is ======================================================================== arch/x86/kernel/head_64.o: warning: objtool: .text+0x5: unreachable instruction fs/btrfs/tree-checker.c: In function \342\200\230check_dev_item\342\200\231: fs/btrfs/tree-checker.c:950:53: warning: passing argument 2 of \342\200\230btrfs_item_size\342\200\231 makes pointer from integer without a cast [-Wint-conversion] 950 | const u32 item_size = btrfs_item_size(leaf, slot); | ^~~~ | | | int In file included from fs/btrfs/tree-checker.c:21: fs/btrfs/ctree.h:1474:48: note: expected \342\200\230const struct btrfs_item *\342\200\231 but argument is of type \342\200\230int\342\200\231 1474 | const type *s) \ | ~~~~~~~~~~~~^ fs/btrfs/ctree.h:1833:1: note: in expansion of macro \342\200\230BTRFS_SETGET_FUNCS\342\200\231 1833 | BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32); | ^~~~~~~~~~~~~~~~~~ ========================================================================
The upstream patchset[1] merged in 5.17-rc1, changed second parameter of btrfs_item_size() from btrfs_item * to int directly. So yes, the backport is wrong.
I'm not familiar with stable backport progress. Should I file a patch using btrfs_item *? Or just drop it?
The patch is related to 0c982944af27d131d3b74242f3528169f66950ad but I wonder why the 0c98294 is not selected automatically.
Thanks.
[1]: https://patchwork.kernel.org/project/linux-btrfs/cover/cover.1634842475.git.... -- Su
thanks,
greg k-h
On Fri, Feb 18, 2022 at 07:25:20PM +0800, Su Yue wrote:
On Fri 18 Feb 2022 at 11:36, Greg KH greg@kroah.com wrote:
On Wed, Feb 09, 2022 at 01:40:52PM -0500, Sasha Levin wrote:
From: Su Yue l@damenly.su
[ Upstream commit ea1d1ca4025ac6c075709f549f9aa036b5b6597d ]
Check item size before accessing the device item to avoid out of bound access, similar to inode_item check.
Signed-off-by: Su Yue l@damenly.su Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org
fs/btrfs/tree-checker.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index d4a3a56726aa8..4a5ee516845f7 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -947,6 +947,7 @@ static int check_dev_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { struct btrfs_dev_item *ditem;
const u32 item_size = btrfs_item_size(leaf, slot);
if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) { dev_item_err(leaf, slot,
@@ -954,6 +955,13 @@ static int check_dev_item(struct extent_buffer *leaf, key->objectid, BTRFS_DEV_ITEMS_OBJECTID); return -EUCLEAN; }
- if (unlikely(item_size != sizeof(*ditem))) {
dev_item_err(leaf, slot, "invalid item size: has %u expect %zu",
item_size, sizeof(*ditem));
return -EUCLEAN;
- }
- ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); if (btrfs_device_id(leaf, ditem) != key->offset) { dev_item_err(leaf, slot,
-- 2.34.1
This adds a build warning, showing that the backport is not correct, so I'll go drop this :(
And the warning is
arch/x86/kernel/head_64.o: warning: objtool: .text+0x5: unreachable instruction fs/btrfs/tree-checker.c: In function \342\200\230check_dev_item\342\200\231: fs/btrfs/tree-checker.c:950:53: warning: passing argument 2 of \342\200\230btrfs_item_size\342\200\231 makes pointer from integer without a cast [-Wint-conversion] 950 | const u32 item_size = btrfs_item_size(leaf, slot); | ^~~~ | | | int In file included from fs/btrfs/tree-checker.c:21: fs/btrfs/ctree.h:1474:48: note: expected \342\200\230const struct btrfs_item *\342\200\231 but argument is of type \342\200\230int\342\200\231 1474 | const type *s) \ | ~~~~~~~~~~~~^ fs/btrfs/ctree.h:1833:1: note: in expansion of macro \342\200\230BTRFS_SETGET_FUNCS\342\200\231 1833 | BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32); | ^~~~~~~~~~~~~~~~~~ ========================================================================
The upstream patchset[1] merged in 5.17-rc1, changed second parameter of btrfs_item_size() from btrfs_item * to int directly. So yes, the backport is wrong.
I'm not familiar with stable backport progress. Should I file a patch using btrfs_item *? Or just drop it?
If you think this needs to be in the stable tree, yes please backport it and send it to us.
The patch is related to 0c982944af27d131d3b74242f3528169f66950ad but I wonder why the 0c98294 is not selected automatically.
No idea, if you think that is needed to, please send it to us.
thanks,
greg k-h
On Fri, Feb 18, 2022 at 07:25:20PM +0800, Su Yue wrote:
On Fri 18 Feb 2022 at 11:36, Greg KH greg@kroah.com wrote:
On Wed, Feb 09, 2022 at 01:40:52PM -0500, Sasha Levin wrote:
\ | ~~~~~~~~~~~~^ fs/btrfs/ctree.h:1833:1: note: in expansion of macro \342\200\230BTRFS_SETGET_FUNCS\342\200\231 1833 | BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32); | ^~~~~~~~~~~~~~~~~~ ========================================================================
The upstream patchset[1] merged in 5.17-rc1, changed second parameter of btrfs_item_size() from btrfs_item * to int directly. So yes, the backport is wrong.
I'm not familiar with stable backport progress. Should I file a patch using btrfs_item *? Or just drop it?
The patch is related to 0c982944af27d131d3b74242f3528169f66950ad but I wonder why the 0c98294 is not selected automatically.
We don't rely on the automatic selection, I evaluate all patches for stable inclusion and add the CC: tag, this works well. Not all patches need to go to stable, but AUTOSEL sometimes picks patches that could be there and if it's not entirely wrong I don't object.
From: Vijayanand Jitta quic_vjitta@quicinc.com
[ Upstream commit b54240ad494300ff0994c4539a531727874381f4 ]
Kasan has reported the following use after free on dev->iommu. when a device probe fails and it is in process of freeing dev->iommu in dev_iommu_free function, a deferred_probe_work_func runs in parallel and tries to access dev->iommu->fwspec in of_iommu_configure path thus causing use after free.
BUG: KASAN: use-after-free in of_iommu_configure+0xb4/0x4a4 Read of size 8 at addr ffffff87a2f1acb8 by task kworker/u16:2/153
Workqueue: events_unbound deferred_probe_work_func Call trace: dump_backtrace+0x0/0x33c show_stack+0x18/0x24 dump_stack_lvl+0x16c/0x1e0 print_address_description+0x84/0x39c __kasan_report+0x184/0x308 kasan_report+0x50/0x78 __asan_load8+0xc0/0xc4 of_iommu_configure+0xb4/0x4a4 of_dma_configure_id+0x2fc/0x4d4 platform_dma_configure+0x40/0x5c really_probe+0x1b4/0xb74 driver_probe_device+0x11c/0x228 __device_attach_driver+0x14c/0x304 bus_for_each_drv+0x124/0x1b0 __device_attach+0x25c/0x334 device_initial_probe+0x24/0x34 bus_probe_device+0x78/0x134 deferred_probe_work_func+0x130/0x1a8 process_one_work+0x4c8/0x970 worker_thread+0x5c8/0xaec kthread+0x1f8/0x220 ret_from_fork+0x10/0x18
Allocated by task 1: ____kasan_kmalloc+0xd4/0x114 __kasan_kmalloc+0x10/0x1c kmem_cache_alloc_trace+0xe4/0x3d4 __iommu_probe_device+0x90/0x394 probe_iommu_group+0x70/0x9c bus_for_each_dev+0x11c/0x19c bus_iommu_probe+0xb8/0x7d4 bus_set_iommu+0xcc/0x13c arm_smmu_bus_init+0x44/0x130 [arm_smmu] arm_smmu_device_probe+0xb88/0xc54 [arm_smmu] platform_drv_probe+0xe4/0x13c really_probe+0x2c8/0xb74 driver_probe_device+0x11c/0x228 device_driver_attach+0xf0/0x16c __driver_attach+0x80/0x320 bus_for_each_dev+0x11c/0x19c driver_attach+0x38/0x48 bus_add_driver+0x1dc/0x3a4 driver_register+0x18c/0x244 __platform_driver_register+0x88/0x9c init_module+0x64/0xff4 [arm_smmu] do_one_initcall+0x17c/0x2f0 do_init_module+0xe8/0x378 load_module+0x3f80/0x4a40 __se_sys_finit_module+0x1a0/0x1e4 __arm64_sys_finit_module+0x44/0x58 el0_svc_common+0x100/0x264 do_el0_svc+0x38/0xa4 el0_svc+0x20/0x30 el0_sync_handler+0x68/0xac el0_sync+0x160/0x180
Freed by task 1: kasan_set_track+0x4c/0x84 kasan_set_free_info+0x28/0x4c ____kasan_slab_free+0x120/0x15c __kasan_slab_free+0x18/0x28 slab_free_freelist_hook+0x204/0x2fc kfree+0xfc/0x3a4 __iommu_probe_device+0x284/0x394 probe_iommu_group+0x70/0x9c bus_for_each_dev+0x11c/0x19c bus_iommu_probe+0xb8/0x7d4 bus_set_iommu+0xcc/0x13c arm_smmu_bus_init+0x44/0x130 [arm_smmu] arm_smmu_device_probe+0xb88/0xc54 [arm_smmu] platform_drv_probe+0xe4/0x13c really_probe+0x2c8/0xb74 driver_probe_device+0x11c/0x228 device_driver_attach+0xf0/0x16c __driver_attach+0x80/0x320 bus_for_each_dev+0x11c/0x19c driver_attach+0x38/0x48 bus_add_driver+0x1dc/0x3a4 driver_register+0x18c/0x244 __platform_driver_register+0x88/0x9c init_module+0x64/0xff4 [arm_smmu] do_one_initcall+0x17c/0x2f0 do_init_module+0xe8/0x378 load_module+0x3f80/0x4a40 __se_sys_finit_module+0x1a0/0x1e4 __arm64_sys_finit_module+0x44/0x58 el0_svc_common+0x100/0x264 do_el0_svc+0x38/0xa4 el0_svc+0x20/0x30 el0_sync_handler+0x68/0xac el0_sync+0x160/0x180
Fix this by setting dev->iommu to NULL first and then freeing dev_iommu structure in dev_iommu_free function.
Suggested-by: Robin Murphy robin.murphy@arm.com Signed-off-by: Vijayanand Jitta quic_vjitta@quicinc.com Link: https://lore.kernel.org/r/1643613155-20215-1-git-send-email-quic_vjitta@quic... Signed-off-by: Joerg Roedel jroedel@suse.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/iommu/iommu.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index bcf060b5cf85b..9d65557dfb2ce 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -185,9 +185,14 @@ static struct dev_iommu *dev_iommu_get(struct device *dev)
static void dev_iommu_free(struct device *dev) { - iommu_fwspec_free(dev); - kfree(dev->iommu); + struct dev_iommu *param = dev->iommu; + dev->iommu = NULL; + if (param->fwspec) { + fwnode_handle_put(param->fwspec->iommu_fwnode); + kfree(param->fwspec); + } + kfree(param); }
static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
From: John Garry john.garry@huawei.com
[ Upstream commit 61f162aa4381845acbdc7f2be4dfb694d027c018 ]
Currently a use-after-free may occur if a TMF sas_task is aborted before we handle the IO completion in mpi_ssp_completion(). The abort occurs due to timeout.
When the timeout occurs, the SAS_TASK_STATE_ABORTED flag is set and the sas_task is freed in pm8001_exec_internal_tmf_task().
However, if the I/O completion occurs later, the I/O completion still thinks that the sas_task is available. Fix this by clearing the ccb->task if the TMF times out - the I/O completion handler does nothing if this pointer is cleared.
Link: https://lore.kernel.org/r/1643289172-165636-3-git-send-email-john.garry@huaw... Reviewed-by: Damien Le Moal damien.lemoal@opensource.wdc.com Acked-by: Jack Wang jinpu.wang@ionos.com Signed-off-by: John Garry john.garry@huawei.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/pm8001/pm8001_sas.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index c3bb58885033b..75ac4d86d9c4b 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -753,8 +753,13 @@ static int pm8001_exec_internal_tmf_task(struct domain_device *dev, res = -TMF_RESP_FUNC_FAILED; /* Even TMF timed out, return direct. */ if (task->task_state_flags & SAS_TASK_STATE_ABORTED) { + struct pm8001_ccb_info *ccb = task->lldd_task; + pm8001_dbg(pm8001_ha, FAIL, "TMF task[%x]timeout.\n", tmf->tmf); + + if (ccb) + ccb->task = NULL; goto ex_err; }
From: John Garry john.garry@huawei.com
[ Upstream commit df7abcaa1246e2537ab4016077b5443bb3c09378 ]
Currently a use-after-free may occur if a sas_task is aborted by the upper layer before we handle the I/O completion in mpi_ssp_completion() or mpi_sata_completion().
In this case, the following are the two steps in handling those I/O completions:
- Call complete() to inform the upper layer handler of completion of the I/O.
- Release driver resources associated with the sas_task in pm8001_ccb_task_free() call.
When complete() is called, the upper layer may free the sas_task. As such, we should not touch the associated sas_task afterwards, but we do so in the pm8001_ccb_task_free() call.
Fix by swapping the complete() and pm8001_ccb_task_free() calls ordering.
Link: https://lore.kernel.org/r/1643289172-165636-4-git-send-email-john.garry@huaw... Reviewed-by: Damien Le Moal damien.lemoal@opensource.wdc.com Acked-by: Jack Wang jinpu.wang@ionos.com Signed-off-by: John Garry john.garry@huawei.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/pm8001/pm80xx_hwi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index a203a4fc2674a..b08e285e7fff6 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -2133,9 +2133,9 @@ mpi_ssp_completion(struct pm8001_hba_info *pm8001_ha , void *piomb) pm8001_dbg(pm8001_ha, FAIL, "task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n", t, status, ts->resp, ts->stat); + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); if (t->slow_task) complete(&t->slow_task->completion); - pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); } else { spin_unlock_irqrestore(&t->task_state_lock, flags); pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); @@ -2726,9 +2726,9 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) pm8001_dbg(pm8001_ha, FAIL, "task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n", t, status, ts->resp, ts->stat); + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); if (t->slow_task) complete(&t->slow_task->completion); - pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); } else { spin_unlock_irqrestore(&t->task_state_lock, flags); pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag);
From: Sagi Grimberg sagi@grimberg.me
[ Upstream commit 0fa0f99fc84e41057cbdd2efbfe91c6b2f47dd9d ]
Unlike .queue_rq, in .submit_async_event drivers may not check the ctrl readiness for AER submission. This may lead to a use-after-free condition that was observed with nvme-tcp.
The race condition may happen in the following scenario: 1. driver executes its reset_ctrl_work 2. -> nvme_stop_ctrl - flushes ctrl async_event_work 3. ctrl sends AEN which is received by the host, which in turn schedules AEN handling 4. teardown admin queue (which releases the queue socket) 5. AEN processed, submits another AER, calling the driver to submit 6. driver attempts to send the cmd ==> use-after-free
In order to fix that, add ctrl state check to validate the ctrl is actually able to accept the AER submission.
This addresses the above race in controller resets because the driver during teardown should: 1. change ctrl state to RESETTING 2. flush async_event_work (as well as other async work elements)
So after 1,2, any other AER command will find the ctrl state to be RESETTING and bail out without submitting the AER.
Signed-off-by: Sagi Grimberg sagi@grimberg.me Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 99b5152482fe4..71c85c99e86c6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4259,7 +4259,14 @@ static void nvme_async_event_work(struct work_struct *work) container_of(work, struct nvme_ctrl, async_event_work);
nvme_aen_uevent(ctrl); - ctrl->ops->submit_async_event(ctrl); + + /* + * The transport drivers must guarantee AER submission here is safe by + * flushing ctrl async_event_work after changing the controller state + * from LIVE and before freeing the admin queue. + */ + if (ctrl->state == NVME_CTRL_LIVE) + ctrl->ops->submit_async_event(ctrl); }
static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl)
From: Sagi Grimberg sagi@grimberg.me
[ Upstream commit ff9fc7ebf5c06de1ef72a69f9b1ab40af8b07f9e ]
While nvme_tcp_submit_async_event_work is checking the ctrl and queue state before preparing the AER command and scheduling io_work, in order to fully prevent a race where this check is not reliable the error recovery work must flush async_event_work before continuing to destroy the admin queue after setting the ctrl state to RESETTING such that there is no race .submit_async_event and the error recovery handler itself changing the ctrl state.
Tested-by: Chris Leech cleech@redhat.com Signed-off-by: Sagi Grimberg sagi@grimberg.me Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/tcp.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index e99d439894187..45b3a919ac281 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2069,6 +2069,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
nvme_stop_keep_alive(ctrl); + flush_work(&ctrl->async_event_work); nvme_tcp_teardown_io_queues(ctrl, false); /* unquiesce to fail fast pending requests */ nvme_start_queues(ctrl);
From: Sagi Grimberg sagi@grimberg.me
[ Upstream commit b6bb1722f34bbdbabed27acdceaf585d300c5fd2 ]
While nvme_rdma_submit_async_event_work is checking the ctrl and queue state before preparing the AER command and scheduling io_work, in order to fully prevent a race where this check is not reliable the error recovery work must flush async_event_work before continuing to destroy the admin queue after setting the ctrl state to RESETTING such that there is no race .submit_async_event and the error recovery handler itself changing the ctrl state.
Signed-off-by: Sagi Grimberg sagi@grimberg.me Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/rdma.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 1b90563818434..8eacc9bd58f5a 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1200,6 +1200,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) struct nvme_rdma_ctrl, err_work);
nvme_stop_keep_alive(&ctrl->ctrl); + flush_work(&ctrl->ctrl.async_event_work); nvme_rdma_teardown_io_queues(ctrl, false); nvme_start_queues(&ctrl->ctrl); nvme_rdma_teardown_admin_queue(ctrl, false);
From: Christian König christian.koenig@amd.com
[ Upstream commit e8ae38720e1a685fd98cfa5ae118c9d07b45ca79 ]
We probably never trigger this, but the logic inside the check is inverted.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Felix Kuehling Felix.Kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 5207ad654f18e..0b162928a248b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -2120,7 +2120,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, unsigned i; int r;
- if (direct_submit && !ring->sched.ready) { + if (!direct_submit && !ring->sched.ready) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; }
From: Jan Beulich jbeulich@suse.com
[ Upstream commit e25a8d959992f61b64a58fc62fb7951dc6f31d1f ]
This started out with me noticing that "dom0_max_vcpus=<N>" with <N> larger than the number of physical CPUs reported through ACPI tables would not bring up the "excess" vCPU-s. Addressing this is the primary purpose of the change; CPU maps handling is being tidied only as far as is necessary for the change here (with the effect of also avoiding the setting up of too much per-CPU infrastructure, i.e. for CPUs which can never come online).
Noticing that xen_fill_possible_map() is called way too early, whereas xen_filter_cpu_maps() is called too late (after per-CPU areas were already set up), and further observing that each of the functions serves only one of Dom0 or DomU, it looked like it was better to simplify this. Use the .get_smp_config hook instead, uniformly for Dom0 and DomU. xen_fill_possible_map() can be dropped altogether, while xen_filter_cpu_maps() is re-purposed but not otherwise changed.
Signed-off-by: Jan Beulich jbeulich@suse.com Reviewed-by: Boris Ostrovsky boris.ostrovsky@oracle.com Link: https://lore.kernel.org/r/2dbd5f0a-9859-ca2d-085e-a02f7166c610@suse.com Signed-off-by: Juergen Gross jgross@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/xen/enlighten_pv.c | 4 ---- arch/x86/xen/smp_pv.c | 26 ++++++-------------------- 2 files changed, 6 insertions(+), 24 deletions(-)
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 16ff25d6935e7..804c65d2b95f3 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1387,10 +1387,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_acpi_sleep_register();
- /* Avoid searching for BIOS MP tables */ - x86_init.mpparse.find_smp_config = x86_init_noop; - x86_init.mpparse.get_smp_config = x86_init_uint_noop; - xen_boot_params_init_edd();
#ifdef CONFIG_ACPI diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index c2ac319f11a4b..8f9e7e2407c87 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -149,28 +149,12 @@ int xen_smp_intr_init_pv(unsigned int cpu) return rc; }
-static void __init xen_fill_possible_map(void) -{ - int i, rc; - - if (xen_initial_domain()) - return; - - for (i = 0; i < nr_cpu_ids; i++) { - rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); - if (rc >= 0) { - num_processors++; - set_cpu_possible(i, true); - } - } -} - -static void __init xen_filter_cpu_maps(void) +static void __init _get_smp_config(unsigned int early) { int i, rc; unsigned int subtract = 0;
- if (!xen_initial_domain()) + if (early) return;
num_processors = 0; @@ -211,7 +195,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void) * sure the old memory can be recycled. */ make_lowmem_page_readwrite(xen_initial_gdt);
- xen_filter_cpu_maps(); xen_setup_vcpu_info_placement();
/* @@ -491,5 +474,8 @@ static const struct smp_ops xen_smp_ops __initconst = { void __init xen_smp_init(void) { smp_ops = xen_smp_ops; - xen_fill_possible_map(); + + /* Avoid searching for BIOS MP tables */ + x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.get_smp_config = _get_smp_config; }
From: Igor Pylypiv ipylypiv@google.com
[ Upstream commit 67d6212afda218d564890d1674bab28e8612170f ]
This reverts commit 774a1221e862b343388347bac9b318767336b20b.
We need to finish all async code before the module init sequence is done. In the reverted commit the PF_USED_ASYNC flag was added to mark a thread that called async_schedule(). Then the PF_USED_ASYNC flag was used to determine whether or not async_synchronize_full() needs to be invoked. This works when modprobe thread is calling async_schedule(), but it does not work if module dispatches init code to a worker thread which then calls async_schedule().
For example, PCI driver probing is invoked from a worker thread based on a node where device is attached:
if (cpu < nr_cpu_ids) error = work_on_cpu(cpu, local_pci_probe, &ddi); else error = local_pci_probe(&ddi);
We end up in a situation where a worker thread gets the PF_USED_ASYNC flag set instead of the modprobe thread. As a result, async_synchronize_full() is not invoked and modprobe completes without waiting for the async code to finish.
The issue was discovered while loading the pm80xx driver: (scsi_mod.scan=async)
modprobe pm80xx worker ... do_init_module() ... pci_call_probe() work_on_cpu(local_pci_probe) local_pci_probe() pm8001_pci_probe() scsi_scan_host() async_schedule() worker->flags |= PF_USED_ASYNC; ... < return from worker > ... if (current->flags & PF_USED_ASYNC) <--- false async_synchronize_full();
Commit 21c3c5d28007 ("block: don't request module during elevator init") fixed the deadlock issue which the reverted commit 774a1221e862 ("module, async: async_synchronize_full() on module init iff async is used") tried to fix.
Since commit 0fdff3ec6d87 ("async, kmod: warn on synchronous request_module() from async workers") synchronous module loading from async is not allowed.
Given that the original deadlock issue is fixed and it is no longer allowed to call synchronous request_module() from async we can remove PF_USED_ASYNC flag to make module init consistently invoke async_synchronize_full() unless async module probe is requested.
Signed-off-by: Igor Pylypiv ipylypiv@google.com Reviewed-by: Changyuan Lyu changyuanl@google.com Reviewed-by: Luis Chamberlain mcgrof@kernel.org Acked-by: Tejun Heo tj@kernel.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- include/linux/sched.h | 1 - kernel/async.c | 3 --- kernel/module.c | 25 +++++-------------------- 3 files changed, 5 insertions(+), 24 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h index b85b26d9ccefe..f996d1f343bb7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1544,7 +1544,6 @@ extern struct pid *cad_pid; #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ -#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF_FROZEN 0x00010000 /* Frozen for system suspend */ #define PF_KSWAPD 0x00020000 /* I am kswapd */ diff --git a/kernel/async.c b/kernel/async.c index 33258e6e20f83..1746cd65e271b 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -205,9 +205,6 @@ async_cookie_t async_schedule_node_domain(async_func_t func, void *data, atomic_inc(&entry_count); spin_unlock_irqrestore(&async_lock, flags);
- /* mark that this task has queued an async job, used by module init */ - current->flags |= PF_USED_ASYNC; - /* schedule for execution */ queue_work_node(node, system_unbound_wq, &entry->work);
diff --git a/kernel/module.c b/kernel/module.c index 185b2655bc206..5f4403198f04b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3714,12 +3714,6 @@ static noinline int do_init_module(struct module *mod) } freeinit->module_init = mod->init_layout.base;
- /* - * We want to find out whether @mod uses async during init. Clear - * PF_USED_ASYNC. async_schedule*() will set it. - */ - current->flags &= ~PF_USED_ASYNC; - do_mod_ctors(mod); /* Start the module */ if (mod->init != NULL) @@ -3745,22 +3739,13 @@ static noinline int do_init_module(struct module *mod)
/* * We need to finish all async code before the module init sequence - * is done. This has potential to deadlock. For example, a newly - * detected block device can trigger request_module() of the - * default iosched from async probing task. Once userland helper - * reaches here, async_synchronize_full() will wait on the async - * task waiting on request_module() and deadlock. - * - * This deadlock is avoided by perfomring async_synchronize_full() - * iff module init queued any async jobs. This isn't a full - * solution as it will deadlock the same if module loading from - * async jobs nests more than once; however, due to the various - * constraints, this hack seems to be the best option for now. - * Please refer to the following thread for details. + * is done. This has potential to deadlock if synchronous module + * loading is requested from async (which is not allowed!). * - * http://thread.gmane.org/gmane.linux.kernel/1420814 + * See commit 0fdff3ec6d87 ("async, kmod: warn on synchronous + * request_module() from async workers") for more details. */ - if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC)) + if (!mod->async_probe_requested) async_synchronize_full();
ftrace_free_mem(mod, mod->init_layout.base, mod->init_layout.base +
From: Kees Cook keescook@chromium.org
[ Upstream commit dcb85f85fa6f142aae1fe86f399d4503d49f2b60 ]
While the stackleak plugin was already using notrace, objtool is now a bit more picky. Update the notrace uses to noinstr. Silences the following objtool warnings when building with:
CONFIG_DEBUG_ENTRY=y CONFIG_STACK_VALIDATION=y CONFIG_VMLINUX_VALIDATION=y CONFIG_GCC_PLUGIN_STACKLEAK=y
vmlinux.o: warning: objtool: do_syscall_64()+0x9: call to stackleak_track_stack() leaves .noinstr.text section vmlinux.o: warning: objtool: do_int80_syscall_32()+0x9: call to stackleak_track_stack() leaves .noinstr.text section vmlinux.o: warning: objtool: exc_general_protection()+0x22: call to stackleak_track_stack() leaves .noinstr.text section vmlinux.o: warning: objtool: fixup_bad_iret()+0x20: call to stackleak_track_stack() leaves .noinstr.text section vmlinux.o: warning: objtool: do_machine_check()+0x27: call to stackleak_track_stack() leaves .noinstr.text section vmlinux.o: warning: objtool: .text+0x5346e: call to stackleak_erase() leaves .noinstr.text section vmlinux.o: warning: objtool: .entry.text+0x143: call to stackleak_erase() leaves .noinstr.text section vmlinux.o: warning: objtool: .entry.text+0x10eb: call to stackleak_erase() leaves .noinstr.text section vmlinux.o: warning: objtool: .entry.text+0x17f9: call to stackleak_erase() leaves .noinstr.text section
Note that the plugin's addition of calls to stackleak_track_stack() from noinstr functions is expected to be safe, as it isn't runtime instrumentation and is self-contained.
Cc: Alexander Popov alex.popov@linux.com Suggested-by: Peter Zijlstra peterz@infradead.org Signed-off-by: Kees Cook keescook@chromium.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/stackleak.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/kernel/stackleak.c b/kernel/stackleak.c index ce161a8e8d975..dd07239ddff9f 100644 --- a/kernel/stackleak.c +++ b/kernel/stackleak.c @@ -48,7 +48,7 @@ int stack_erasing_sysctl(struct ctl_table *table, int write, #define skip_erasing() false #endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */
-asmlinkage void notrace stackleak_erase(void) +asmlinkage void noinstr stackleak_erase(void) { /* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */ unsigned long kstack_ptr = current->lowest_stack; @@ -102,9 +102,8 @@ asmlinkage void notrace stackleak_erase(void) /* Reset the 'lowest_stack' value for the next syscall */ current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64; } -NOKPROBE_SYMBOL(stackleak_erase);
-void __used __no_caller_saved_registers notrace stackleak_track_stack(void) +void __used __no_caller_saved_registers noinstr stackleak_track_stack(void) { unsigned long sp = current_stack_pointer;
From: "Jason A. Donenfeld" Jason@zx2c4.com
[ Upstream commit 042e293e16e3aa9794ce60c29f5b7b0c8170f933 ]
When account() is called, and the amount of entropy dips below random_write_wakeup_bits, we wake up the random writers, so that they can write some more in. However, the RNDZAPENTCNT/RNDCLEARPOOL ioctl sets the entropy count to zero -- a potential reduction just like account() -- but does not unblock writers. This commit adds the missing logic to that ioctl to unblock waiting writers.
Reviewed-by: Dominik Brodowski linux@dominikbrodowski.net Signed-off-by: Jason A. Donenfeld Jason@zx2c4.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/char/random.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/char/random.c b/drivers/char/random.c index 5444206f35e22..5f541c9465598 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1987,7 +1987,10 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; - input_pool.entropy_count = 0; + if (xchg(&input_pool.entropy_count, 0) && random_write_wakeup_bits) { + wake_up_interruptible(&random_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); + } return 0; case RNDRESEEDCRNG: if (!capable(CAP_SYS_ADMIN))
linux-stable-mirror@lists.linaro.org