Sasha, Greg,
Can you please backport CONFIG_LEGACY_TIOCSTI support into stable
kernels?
This, perhaps, would include there mainline commits:
83efeeeb3d04b22aaed1df99bc70a48fe9d22c4d tty: Allow TIOCSTI to be disabled
5c30f3e4a6e67c88c979ad30554bf4ef9b24fbd0 tty: Move TIOCSTI toggle variable before kerndoc
b2ea273a477cd6e83daedbfa1981cd1a7468f73a tty: Fix typo in LEGACY_TIOCSTI Kconfig description
690c8b804ad2eafbd35da5d3c95ad325ca7d5061 TIOCSTI: always enable for CAP_SYS_ADMIN
3f29d9ee323ae5cda59d144d1f8b0b10ea065be0 TIOCSTI: Document CAP_SYS_ADMIN behaviour in Kconfig
8d1b43f6a6df7bcea20982ad376a000d90906b42 tty: Restrict access to TIOCLINUX' copy-and-paste subcommands
Thanks,
We cannot use CLONE_VFORK because we also need to wait for the timeout
signal.
Restore tests timeout by using the original fork() call in __run_test()
but also in __TEST_F_IMPL(). Also fix a race condition when waiting for
the test child process.
Because test metadata are shared between test processes, only the
parent process must set the test PID (child). Otherwise, t->pid may be
set to zero, leading to inconsistent error cases:
# RUN layout1.rule_on_mountpoint ...
# rule_on_mountpoint: Test ended in some other way [127]
# OK layout1.rule_on_mountpoint
ok 20 layout1.rule_on_mountpoint
As safeguards, initialize the "status" variable with a valid exit code,
and handle unknown test exits as errors.
The use of fork() introduces a new race condition in landlock/fs_test.c
which seems to be specific to hostfs bind mounts, but I haven't found
the root cause and it's difficult to trigger. I'll try to fix it with
another patch.
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Günther Noack <gnoack(a)google.com>
Cc: Jakub Kicinski <kuba(a)kernel.org>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Mark Brown <broonie(a)kernel.org>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Will Drewry <wad(a)chromium.org>
Cc: stable(a)vger.kernel.org
Closes: https://lore.kernel.org/r/9341d4db-5e21-418c-bf9e-9ae2da7877e1@sirena.org.uk
Fixes: a86f18903db9 ("selftests/harness: Fix interleaved scheduling leading to race conditions")
Fixes: 24cf65a62266 ("selftests/harness: Share _metadata between forked processes")
Signed-off-by: Mickaël Salaün <mic(a)digikod.net>
Link: https://lore.kernel.org/r/20240621180605.834676-1-mic@digikod.net
---
tools/testing/selftests/kselftest_harness.h | 43 ++++++++++++---------
1 file changed, 24 insertions(+), 19 deletions(-)
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index b634969cbb6f..40723a6a083f 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -66,8 +66,6 @@
#include <sys/wait.h>
#include <unistd.h>
#include <setjmp.h>
-#include <syscall.h>
-#include <linux/sched.h>
#include "kselftest.h"
@@ -82,17 +80,6 @@
# define TH_LOG_ENABLED 1
#endif
-/* Wait for the child process to end but without sharing memory mapping. */
-static inline pid_t clone3_vfork(void)
-{
- struct clone_args args = {
- .flags = CLONE_VFORK,
- .exit_signal = SIGCHLD,
- };
-
- return syscall(__NR_clone3, &args, sizeof(args));
-}
-
/**
* TH_LOG()
*
@@ -437,7 +424,7 @@ static inline pid_t clone3_vfork(void)
} \
if (setjmp(_metadata->env) == 0) { \
/* _metadata and potentially self are shared with all forks. */ \
- child = clone3_vfork(); \
+ child = fork(); \
if (child == 0) { \
fixture_name##_setup(_metadata, self, variant->data); \
/* Let setup failure terminate early. */ \
@@ -1016,7 +1003,14 @@ void __wait_for_test(struct __test_metadata *t)
.sa_flags = SA_SIGINFO,
};
struct sigaction saved_action;
- int status;
+ /*
+ * Sets status so that WIFEXITED(status) returns true and
+ * WEXITSTATUS(status) returns KSFT_FAIL. This safe default value
+ * should never be evaluated because of the waitpid(2) check and
+ * SIGALRM handling.
+ */
+ int status = KSFT_FAIL << 8;
+ int child;
if (sigaction(SIGALRM, &action, &saved_action)) {
t->exit_code = KSFT_FAIL;
@@ -1028,7 +1022,15 @@ void __wait_for_test(struct __test_metadata *t)
__active_test = t;
t->timed_out = false;
alarm(t->timeout);
- waitpid(t->pid, &status, 0);
+ child = waitpid(t->pid, &status, 0);
+ if (child == -1 && errno != EINTR) {
+ t->exit_code = KSFT_FAIL;
+ fprintf(TH_LOG_STREAM,
+ "# %s: Failed to wait for PID %d (errno: %d)\n",
+ t->name, t->pid, errno);
+ return;
+ }
+
alarm(0);
if (sigaction(SIGALRM, &saved_action, NULL)) {
t->exit_code = KSFT_FAIL;
@@ -1083,6 +1085,7 @@ void __wait_for_test(struct __test_metadata *t)
WTERMSIG(status));
}
} else {
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
"# %s: Test ended in some other way [%u]\n",
t->name,
@@ -1218,6 +1221,7 @@ void __run_test(struct __fixture_metadata *f,
struct __test_xfail *xfail;
char test_name[1024];
const char *diagnostic;
+ int child;
/* reset test struct */
t->exit_code = KSFT_PASS;
@@ -1236,15 +1240,16 @@ void __run_test(struct __fixture_metadata *f,
fflush(stdout);
fflush(stderr);
- t->pid = clone3_vfork();
- if (t->pid < 0) {
+ child = fork();
+ if (child < 0) {
ksft_print_msg("ERROR SPAWNING TEST CHILD\n");
t->exit_code = KSFT_FAIL;
- } else if (t->pid == 0) {
+ } else if (child == 0) {
setpgrp();
t->fn(t, variant);
_exit(t->exit_code);
} else {
+ t->pid = child;
__wait_for_test(t);
}
ksft_print_msg(" %4s %s\n",
base-commit: 83a7eefedc9b56fe7bfeff13b6c7356688ffa670
--
2.45.2
From: Thomas Weißschuh <linux(a)weissschuh.net>
The cell sysfs attribute should not provide more access to the nvmem
data than the main attribute itself.
For example if nvme_config::root_only was set, the cell attribute
would still provide read access to everybody.
Mask out permissions not available on the main attribute.
Fixes: 0331c611949f ("nvmem: core: Expose cells through sysfs")
Cc: stable(a)vger.kernel.org
Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
---
drivers/nvmem/core.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index 1285300ed239..f8dd7eb40fbe 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -462,7 +462,7 @@ static int nvmem_populate_sysfs_cells(struct nvmem_device *nvmem)
"%s@%x,%x", entry->name,
entry->offset,
entry->bit_offset);
- attrs[i].attr.mode = 0444;
+ attrs[i].attr.mode = 0444 & nvmem_bin_attr_get_umode(nvmem);
attrs[i].size = entry->bytes;
attrs[i].read = &nvmem_cell_attr_read;
attrs[i].private = entry;
--
2.25.1
From: Thomas Weißschuh <linux(a)weissschuh.net>
bin_attr_nvmem_eeprom_compat is the template from which all future
compat attributes are created.
Changing it means to change all subsquent compat attributes, too.
Instead only use the "fram" name for the currently registered attribute.
Fixes: fd307a4ad332 ("nvmem: prepare basics for FRAM support")
Cc: stable(a)vger.kernel.org
Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
---
drivers/nvmem/core.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index e1ec3b7200d7..1285300ed239 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -396,10 +396,9 @@ static int nvmem_sysfs_setup_compat(struct nvmem_device *nvmem,
if (!config->base_dev)
return -EINVAL;
- if (config->type == NVMEM_TYPE_FRAM)
- bin_attr_nvmem_eeprom_compat.attr.name = "fram";
-
nvmem->eeprom = bin_attr_nvmem_eeprom_compat;
+ if (config->type == NVMEM_TYPE_FRAM)
+ nvmem->eeprom.attr.name = "fram";
nvmem->eeprom.attr.mode = nvmem_bin_attr_get_umode(nvmem);
nvmem->eeprom.size = nvmem->size;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
--
2.25.1
From: Joy Chakraborty <joychakr(a)google.com>
reg_read() callback registered with nvmem core expects 0 on success and
a negative value on error but rmem_read() returns the number of bytes
read which is treated as an error at the nvmem core.
This does not break when rmem is accessed using sysfs via
bin_attr_nvmem_read()/write() but causes an error when accessed from
places like nvmem_access_with_keepouts(), etc.
Change to return 0 on success and error in case
memory_read_from_buffer() returns an error or -EIO if bytes read do not
match what was requested.
Fixes: 5a3fa75a4d9c ("nvmem: Add driver to expose reserved memory as nvmem")
Cc: stable(a)vger.kernel.org
Signed-off-by: Joy Chakraborty <joychakr(a)google.com>
Reviewed-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
---
drivers/nvmem/rmem.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/nvmem/rmem.c b/drivers/nvmem/rmem.c
index 752d0bf4445e..7f907c5a445e 100644
--- a/drivers/nvmem/rmem.c
+++ b/drivers/nvmem/rmem.c
@@ -46,7 +46,10 @@ static int rmem_read(void *context, unsigned int offset,
memunmap(addr);
- return count;
+ if (count < 0)
+ return count;
+
+ return count == bytes ? 0 : -EIO;
}
static int rmem_probe(struct platform_device *pdev)
--
2.25.1
From: yangge <yangge1116(a)126.com>
If a large number of CMA memory are configured in system (for
example, the CMA memory accounts for 50% of the system memory),
starting a SEV virtual machine will fail. During starting the SEV
virtual machine, it will call pin_user_pages_fast(..., FOLL_LONGTERM,
...) to pin memory. Normally if a page is present and in CMA area,
pin_user_pages_fast() will first call __get_user_pages_locked() to
pin the page in CMA area, and then call
check_and_migrate_movable_pages() to migrate the page from CMA area
to non-CMA area. But the current code calling __get_user_pages_locked()
will fail, because it call try_grab_folio() to pin page in gup slow
path.
The commit 57edfcfd3419 ("mm/gup: accelerate thp gup even for "pages
!= NULL"") uses try_grab_folio() in gup slow path, which seems to be
problematic because try_grap_folio() will check if the page can be
longterm pinned. This check may fail and cause __get_user_pages_lock()
to fail. However, these checks are not required in gup slow path,
seems we can use try_grab_page() instead of try_grab_folio(). In
addition, in the current code, try_grab_page() can only add 1 to the
page's refcount. We extend this function so that the page's refcount
can be increased according to the parameters passed in.
The following log reveals it:
[ 464.325306] WARNING: CPU: 13 PID: 6734 at mm/gup.c:1313 __get_user_pages+0x423/0x520
[ 464.325464] CPU: 13 PID: 6734 Comm: qemu-kvm Kdump: loaded Not tainted 6.6.33+ #6
[ 464.325477] RIP: 0010:__get_user_pages+0x423/0x520
[ 464.325515] Call Trace:
[ 464.325520] <TASK>
[ 464.325523] ? __get_user_pages+0x423/0x520
[ 464.325528] ? __warn+0x81/0x130
[ 464.325536] ? __get_user_pages+0x423/0x520
[ 464.325541] ? report_bug+0x171/0x1a0
[ 464.325549] ? handle_bug+0x3c/0x70
[ 464.325554] ? exc_invalid_op+0x17/0x70
[ 464.325558] ? asm_exc_invalid_op+0x1a/0x20
[ 464.325567] ? __get_user_pages+0x423/0x520
[ 464.325575] __gup_longterm_locked+0x212/0x7a0
[ 464.325583] internal_get_user_pages_fast+0xfb/0x190
[ 464.325590] pin_user_pages_fast+0x47/0x60
[ 464.325598] sev_pin_memory+0xca/0x170 [kvm_amd]
[ 464.325616] sev_mem_enc_register_region+0x81/0x130 [kvm_amd]
Fixes: 57edfcfd3419 ("mm/gup: accelerate thp gup even for "pages != NULL"")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: yangge <yangge1116(a)126.com>
---
mm/gup.c | 26 ++++++++++++--------------
mm/huge_memory.c | 2 +-
mm/internal.h | 2 +-
3 files changed, 14 insertions(+), 16 deletions(-)
diff --git a/mm/gup.c b/mm/gup.c
index 6ff9f95..bb58909 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -222,7 +222,7 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
* -ENOMEM FOLL_GET or FOLL_PIN was set, but the page could not
* be grabbed.
*/
-int __must_check try_grab_page(struct page *page, unsigned int flags)
+int __must_check try_grab_page(struct page *page, int refs, unsigned int flags)
{
struct folio *folio = page_folio(page);
@@ -233,7 +233,7 @@ int __must_check try_grab_page(struct page *page, unsigned int flags)
return -EREMOTEIO;
if (flags & FOLL_GET)
- folio_ref_inc(folio);
+ folio_ref_add(folio, refs);
else if (flags & FOLL_PIN) {
/*
* Don't take a pin on the zero page - it's not going anywhere
@@ -248,13 +248,13 @@ int __must_check try_grab_page(struct page *page, unsigned int flags)
* so that the page really is pinned.
*/
if (folio_test_large(folio)) {
- folio_ref_add(folio, 1);
- atomic_add(1, &folio->_pincount);
+ folio_ref_add(folio, refs);
+ atomic_add(refs, &folio->_pincount);
} else {
- folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
+ folio_ref_add(folio, refs * GUP_PIN_COUNTING_BIAS);
}
- node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1);
+ node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
}
return 0;
@@ -729,7 +729,7 @@ static struct page *follow_huge_pud(struct vm_area_struct *vma,
gup_must_unshare(vma, flags, page))
return ERR_PTR(-EMLINK);
- ret = try_grab_page(page, flags);
+ ret = try_grab_page(page, 1, flags);
if (ret)
page = ERR_PTR(ret);
else
@@ -806,7 +806,7 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma,
VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
!PageAnonExclusive(page), page);
- ret = try_grab_page(page, flags);
+ ret = try_grab_page(page, 1, flags);
if (ret)
return ERR_PTR(ret);
@@ -969,7 +969,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
!PageAnonExclusive(page), page);
/* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */
- ret = try_grab_page(page, flags);
+ ret = try_grab_page(page, 1, flags);
if (unlikely(ret)) {
page = ERR_PTR(ret);
goto out;
@@ -1233,7 +1233,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
goto unmap;
*page = pte_page(entry);
}
- ret = try_grab_page(*page, gup_flags);
+ ret = try_grab_page(*page, 1, gup_flags);
if (unlikely(ret))
goto unmap;
out:
@@ -1636,22 +1636,20 @@ static long __get_user_pages(struct mm_struct *mm,
* pages.
*/
if (page_increm > 1) {
- struct folio *folio;
/*
* Since we already hold refcount on the
* large folio, this should never fail.
*/
- folio = try_grab_folio(page, page_increm - 1,
+ ret = try_grab_page(page, page_increm - 1,
foll_flags);
- if (WARN_ON_ONCE(!folio)) {
+ if (WARN_ON_ONCE(ret)) {
/*
* Release the 1st page ref if the
* folio is problematic, fail hard.
*/
gup_put_folio(page_folio(page), 1,
foll_flags);
- ret = -EFAULT;
goto out;
}
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 425374a..18604e4 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1332,7 +1332,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
if (!*pgmap)
return ERR_PTR(-EFAULT);
page = pfn_to_page(pfn);
- ret = try_grab_page(page, flags);
+ ret = try_grab_page(page, 1, flags);
if (ret)
page = ERR_PTR(ret);
diff --git a/mm/internal.h b/mm/internal.h
index 2ea9a88..5305bbf 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1227,7 +1227,7 @@ int migrate_device_coherent_page(struct page *page);
* mm/gup.c
*/
struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
-int __must_check try_grab_page(struct page *page, unsigned int flags);
+int __must_check try_grab_page(struct page *page, int refs, unsigned int flags);
/*
* mm/huge_memory.c
--
2.7.4