From: Colin Ian King <colin.king(a)canonical.com>
More recent libc implementations are now using openat/openat2 system
calls so also add do_sys_openat2 to the tracing so that the test
passes on these systems because do_sys_open may not be called.
Thanks to Masami Hiramatsu for the help on getting this fix to work
correctly.
Signed-off-by: Colin Ian King <colin.king(a)canonical.com>
---
V2: write myevent2 using >> rather than >, also enable and disable
myevent2
---
.../selftests/ftrace/test.d/kprobe/kprobe_args_user.tc | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc
index a30a9c07290d..d25d01a19778 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc
@@ -9,12 +9,16 @@ grep -A10 "fetcharg:" README | grep -q '\[u\]<offset>' || exit_unsupported
:;: "user-memory access syntax and ustring working on user memory";:
echo 'p:myevent do_sys_open path=+0($arg2):ustring path2=+u0($arg2):string' \
> kprobe_events
+echo 'p:myevent2 do_sys_openat2 path=+0($arg2):ustring path2=+u0($arg2):string' \
+ >> kprobe_events
grep myevent kprobe_events | \
grep -q 'path=+0($arg2):ustring path2=+u0($arg2):string'
echo 1 > events/kprobes/myevent/enable
+echo 1 > events/kprobes/myevent2/enable
echo > /dev/null
echo 0 > events/kprobes/myevent/enable
+echo 0 > events/kprobes/myevent2/enable
grep myevent trace | grep -q 'path="/dev/null" path2="/dev/null"'
--
2.27.0
Implementation of support for parameterized testing in KUnit.
This approach requires the creation of a test case using the
KUNIT_CASE_PARAM macro that accepts a generator function as input.
This generator function should return the next parameter given the
previous parameter in parameterized tests. It also provides
a macro to generate common-case generators.
Signed-off-by: Arpitha Raghunandan <98.arpi(a)gmail.com>
Co-developed-by: Marco Elver <elver(a)google.com>
Signed-off-by: Marco Elver <elver(a)google.com>
---
Changes v2->v3:
- Modifictaion of generator macro and method
Changes v1->v2:
- Use of a generator method to access test case parameters
include/kunit/test.h | 32 ++++++++++++++++++++++++++++++++
lib/kunit/test.c | 20 +++++++++++++++++++-
2 files changed, 51 insertions(+), 1 deletion(-)
diff --git a/include/kunit/test.h b/include/kunit/test.h
index a423fffefea0..16bf9f334e2c 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -142,6 +142,12 @@ struct kunit_case {
void (*run_case)(struct kunit *test);
const char *name;
+ /*
+ * Pointer to test parameter generator function.
+ * Used only for parameterized tests.
+ */
+ void* (*generate_params)(void *prev);
+
/* private: internal use only. */
bool success;
char *log;
@@ -162,6 +168,9 @@ static inline char *kunit_status_to_string(bool status)
* &struct kunit_case for an example on how to use it.
*/
#define KUNIT_CASE(test_name) { .run_case = test_name, .name = #test_name }
+#define KUNIT_CASE_PARAM(test_name, gen_params) \
+ { .run_case = test_name, .name = #test_name, \
+ .generate_params = gen_params }
/**
* struct kunit_suite - describes a related collection of &struct kunit_case
@@ -208,6 +217,15 @@ struct kunit {
const char *name; /* Read only after initialization! */
char *log; /* Points at case log after initialization */
struct kunit_try_catch try_catch;
+ /* param_values points to test case parameters in parameterized tests */
+ void *param_values;
+ /*
+ * current_param stores the index of the parameter in
+ * the array of parameters in parameterized tests.
+ * current_param + 1 is printed to indicate the parameter
+ * that causes the test to fail in case of test failure.
+ */
+ int current_param;
/*
* success starts as true, and may only be set to false during a
* test case; thus, it is safe to update this across multiple
@@ -1742,4 +1760,18 @@ do { \
fmt, \
##__VA_ARGS__)
+/**
+ * KUNIT_PARAM_GENERATOR() - Helper method for test parameter generators
+ * required in parameterized tests.
+ * @name: prefix of the name for the test parameter generator function.
+ * @prev: a pointer to the previous test parameter, NULL for first parameter.
+ * @array: a user-supplied pointer to an array of test parameters.
+ */
+#define KUNIT_PARAM_GENERATOR(name, array) \
+ static void *name##_gen_params(void *prev) \
+ { \
+ typeof((array)[0]) * __next = prev ? ((typeof(__next)) prev) + 1 : (array); \
+ return __next - (array) < ARRAY_SIZE((array)) ? __next : NULL; \
+ }
+
#endif /* _KUNIT_TEST_H */
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index 750704abe89a..b70ab9b12f3b 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -127,6 +127,11 @@ unsigned int kunit_test_case_num(struct kunit_suite *suite,
}
EXPORT_SYMBOL_GPL(kunit_test_case_num);
+static void kunit_print_failed_param(struct kunit *test)
+{
+ kunit_err(test, "\n\tTest failed at parameter: %d\n", test->current_param + 1);
+}
+
static void kunit_print_string_stream(struct kunit *test,
struct string_stream *stream)
{
@@ -168,6 +173,8 @@ static void kunit_fail(struct kunit *test, struct kunit_assert *assert)
assert->format(assert, stream);
kunit_print_string_stream(test, stream);
+ if (test->param_values)
+ kunit_print_failed_param(test);
WARN_ON(string_stream_destroy(stream));
}
@@ -239,7 +246,18 @@ static void kunit_run_case_internal(struct kunit *test,
}
}
- test_case->run_case(test);
+ if (!test_case->generate_params) {
+ test_case->run_case(test);
+ } else {
+ test->param_values = test_case->generate_params(NULL);
+ test->current_param = 0;
+
+ while (test->param_values) {
+ test_case->run_case(test);
+ test->param_values = test_case->generate_params(test->param_values);
+ test->current_param++;
+ }
+ }
}
static void kunit_case_internal_cleanup(struct kunit *test)
--
2.25.1
From: Ira Weiny <ira.weiny(a)intel.com>
Changes from RFC V3[3]
Rebase to TIP master
Update test error output
Standardize on 'irq_state' for state variables
From Dave Hansen
Update commit messages
Add/clean up comments
Add X86_FEATURE_PKS to disabled-features.h and remove some
explicit CONFIG checks
Move saved_pkrs member of thread_struct
Remove superfluous preempt_disable()
s/irq_save_pks/irq_save_set_pks/
Ensure PKRS is not seen in faults if not configured or not
supported
s/pks_mknoaccess/pks_mk_noaccess/
s/pks_mkread/pks_mk_readonly/
s/pks_mkrdwr/pks_mk_readwrite/
Change pks_key_alloc return to -EOPNOTSUPP when not supported
From Peter Zijlstra
Clean up Attribution
Remove superfluous preempt_disable()
Add union to differentiate exit_rcu/lockdep use in
irqentry_state_t
From Thomas Gleixner
Add preliminary clean up patch and adjust series as needed
Introduce a new page protection mechanism for supervisor pages, Protection Key
Supervisor (PKS).
2 use cases for PKS are being developed, trusted keys and PMEM. Trusted keys
is a newer use case which is still being explored. PMEM was submitted as part
of the RFC (v2) series[1]. However, since then it was found that some callers
of kmap() require a global implementation of PKS. Specifically some users of
kmap() expect mappings to be available to all kernel threads. While global use
of PKS is rare it needs to be included for correctness. Unfortunately the
kmap() updates required a large patch series to make the needed changes at the
various kmap() call sites so that patch set has been split out. Because the
global PKS feature is only required for that use case it will be deferred to
that set as well.[2] This patch set is being submitted as a precursor to both
of the use cases.
For an overview of the entire PKS ecosystem, a git tree including this series
and 2 proposed use cases can be found here:
https://lore.kernel.org/lkml/20201009195033.3208459-1-ira.weiny@intel.com/https://lore.kernel.org/lkml/20201009201410.3209180-1-ira.weiny@intel.com/
PKS enables protections on 'domains' of supervisor pages to limit supervisor
mode access to those pages beyond the normal paging protections. PKS works in
a similar fashion to user space pkeys, PKU. As with PKU, supervisor pkeys are
checked in addition to normal paging protections and Access or Writes can be
disabled via a MSR update without TLB flushes when permissions change. Also
like PKU, a page mapping is assigned to a domain by setting pkey bits in the
page table entry for that mapping.
Access is controlled through a PKRS register which is updated via WRMSR/RDMSR.
XSAVE is not supported for the PKRS MSR. Therefore the implementation
saves/restores the MSR across context switches and during exceptions. Nested
exceptions are supported by each exception getting a new PKS state.
For consistent behavior with current paging protections, pkey 0 is reserved and
configured to allow full access via the pkey mechanism, thus preserving the
default paging protections on mappings with the default pkey value of 0.
Other keys, (1-15) are allocated by an allocator which prepares us for key
contention from day one. Kernel users should be prepared for the allocator to
fail either because of key exhaustion or due to PKS not being supported on the
arch and/or CPU instance.
The following are key attributes of PKS.
1) Fast switching of permissions
1a) Prevents access without page table manipulations
1b) No TLB flushes required
2) Works on a per thread basis
PKS is available with 4 and 5 level paging. Like PKRU it consumes 4 bits from
the PTE to store the pkey within the entry.
[1] https://lore.kernel.org/lkml/20200717072056.73134-1-ira.weiny@intel.com/
[2] https://lore.kernel.org/lkml/20201009195033.3208459-2-ira.weiny@intel.com/
[3] https://lore.kernel.org/lkml/20201009194258.3207172-1-ira.weiny@intel.com/
Fenghua Yu (2):
x86/pks: Enable Protection Keys Supervisor (PKS)
x86/pks: Add PKS kernel API
Ira Weiny (7):
x86/pkeys: Create pkeys_common.h
x86/fpu: Refactor arch_set_user_pkey_access() for PKS support
x86/pks: Preserve the PKRS MSR on context switch
x86/entry: Pass irqentry_state_t by reference
x86/entry: Preserve PKRS MSR across exceptions
x86/fault: Report the PKRS state on fault
x86/pks: Add PKS test code
Thomas Gleixner (1):
x86/entry: Move nmi entry/exit into common code
Documentation/core-api/protection-keys.rst | 102 ++-
arch/x86/Kconfig | 1 +
arch/x86/entry/common.c | 65 +-
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/include/asm/disabled-features.h | 8 +-
arch/x86/include/asm/idtentry.h | 28 +-
arch/x86/include/asm/msr-index.h | 1 +
arch/x86/include/asm/pgtable.h | 13 +-
arch/x86/include/asm/pgtable_types.h | 12 +
arch/x86/include/asm/pkeys.h | 15 +
arch/x86/include/asm/pkeys_common.h | 40 ++
arch/x86/include/asm/processor.h | 14 +
arch/x86/include/uapi/asm/processor-flags.h | 2 +
arch/x86/kernel/cpu/common.c | 15 +
arch/x86/kernel/cpu/mce/core.c | 6 +-
arch/x86/kernel/fpu/xstate.c | 22 +-
arch/x86/kernel/kvm.c | 6 +-
arch/x86/kernel/nmi.c | 6 +-
arch/x86/kernel/process.c | 26 +
arch/x86/kernel/traps.c | 24 +-
arch/x86/mm/fault.c | 87 ++-
arch/x86/mm/pkeys.c | 191 +++++-
include/linux/entry-common.h | 46 +-
include/linux/pgtable.h | 4 +
include/linux/pkeys.h | 22 +
kernel/entry/common.c | 62 +-
lib/Kconfig.debug | 12 +
lib/Makefile | 3 +
lib/pks/Makefile | 3 +
lib/pks/pks_test.c | 691 ++++++++++++++++++++
mm/Kconfig | 2 +
tools/testing/selftests/x86/Makefile | 3 +-
tools/testing/selftests/x86/test_pks.c | 66 ++
33 files changed, 1441 insertions(+), 158 deletions(-)
create mode 100644 arch/x86/include/asm/pkeys_common.h
create mode 100644 lib/pks/Makefile
create mode 100644 lib/pks/pks_test.c
create mode 100644 tools/testing/selftests/x86/test_pks.c
--
2.28.0.rc0.12.gb6a658bd00c9
From: Mike Rapoport <rppt(a)linux.ibm.com>
Hi,
This is an implementation of "secret" mappings backed by a file descriptor.
The file descriptor backing secret memory mappings is created using a
dedicated memfd_secret system call The desired protection mode for the
memory is configured using flags parameter of the system call. The mmap()
of the file descriptor created with memfd_secret() will create a "secret"
memory mapping. The pages in that mapping will be marked as not present in
the direct map and will have desired protection bits set in the user page
table. For instance, current implementation allows uncached mappings.
Although normally Linux userspace mappings are protected from other users,
such secret mappings are useful for environments where a hostile tenant is
trying to trick the kernel into giving them access to other tenants
mappings.
Additionally, in the future the secret mappings may be used as a mean to
protect guest memory in a virtual machine host.
For demonstration of secret memory usage we've created a userspace library
https://git.kernel.org/pub/scm/linux/kernel/git/jejb/secret-memory-preloade…
that does two things: the first is act as a preloader for openssl to
redirect all the OPENSSL_malloc calls to secret memory meaning any secret
keys get automatically protected this way and the other thing it does is
expose the API to the user who needs it. We anticipate that a lot of the
use cases would be like the openssl one: many toolkits that deal with
secret keys already have special handling for the memory to try to give
them greater protection, so this would simply be pluggable into the
toolkits without any need for user application modification.
Hiding secret memory mappings behind an anonymous file allows (ab)use of
the page cache for tracking pages allocated for the "secret" mappings as
well as using address_space_operations for e.g. page migration callbacks.
The anonymous file may be also used implicitly, like hugetlb files, to
implement mmap(MAP_SECRET) and use the secret memory areas with "native" mm
ABIs in the future.
To limit fragmentation of the direct map to splitting only PUD-size pages,
I've added an amortizing cache of PMD-size pages to each file descriptor
that is used as an allocation pool for the secret memory areas.
It is easy to add boot time reservation of the memory for secretmem
needs. There was an implementation in earlier version of this set, but I've
dropped it for now as there is no consensus whether the boot time
reservation should be done from memblock or from CMA. I beleive we can have
this discussion after straightening out the basic implementation.
v7:
* Use set_direct_map() instead of __kernel_map_pages() to ensure error
handling in case the direct map update fails
* Add accounting of large pages used to reduce the direct map fragmentation
* Teach get_user_pages() and frieds to refuse get/pin secretmem pages
v6: https://lore.kernel.org/lkml/20200924132904.1391-1-rppt@kernel.org
* Silence the warning about missing syscall, thanks to Qian Cai
* Replace spaces with tabs in Kconfig additions, per Randy
* Add a selftest.
v5: https://lore.kernel.org/lkml/20200916073539.3552-1-rppt@kernel.org
* rebase on v5.9-rc5
* drop boot time memory reservation patch
v4: https://lore.kernel.org/lkml/20200818141554.13945-1-rppt@kernel.org
* rebase on v5.9-rc1
* Do not redefine PMD_PAGE_ORDER in fs/dax.c, thanks Kirill
* Make secret mappings exclusive by default and only require flags to
memfd_secret() system call for uncached mappings, thanks again Kirill :)
v3: https://lore.kernel.org/lkml/20200804095035.18778-1-rppt@kernel.org
* Squash kernel-parameters.txt update into the commit that added the
command line option.
* Make uncached mode explicitly selectable by architectures. For now enable
it only on x86.
v2: https://lore.kernel.org/lkml/20200727162935.31714-1-rppt@kernel.org
* Follow Michael's suggestion and name the new system call 'memfd_secret'
* Add kernel-parameters documentation about the boot option
* Fix i386-tinyconfig regression reported by the kbuild bot.
CONFIG_SECRETMEM now depends on !EMBEDDED to disable it on small systems
from one side and still make it available unconditionally on
architectures that support SET_DIRECT_MAP.
v1: https://lore.kernel.org/lkml/20200720092435.17469-1-rppt@kernel.org
Mike Rapoport (8):
mm: add definition of PMD_PAGE_ORDER
mmap: make mlock_future_check() global
set_memory: allow set_direct_map_*_noflush() for multiple pages
mm: introduce memfd_secret system call to create "secret" memory areas
arch, mm: wire up memfd_secret system call were relevant
mm: secretmem: use PMD-size pages to amortize direct map fragmentation
secretmem: test: add basic selftest for memfd_secret(2)
mm: secretmem: add ability to reserve memory at boot
arch/Kconfig | 7 +
arch/arm64/include/asm/cacheflush.h | 4 +-
arch/arm64/include/asm/unistd.h | 2 +-
arch/arm64/include/asm/unistd32.h | 2 +
arch/arm64/include/uapi/asm/unistd.h | 1 +
arch/arm64/mm/pageattr.c | 10 +-
arch/riscv/include/asm/set_memory.h | 4 +-
arch/riscv/include/asm/unistd.h | 1 +
arch/riscv/mm/pageattr.c | 8 +-
arch/x86/Kconfig | 1 +
arch/x86/entry/syscalls/syscall_32.tbl | 1 +
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
arch/x86/include/asm/set_memory.h | 4 +-
arch/x86/mm/pat/set_memory.c | 8 +-
fs/dax.c | 11 +-
include/linux/pgtable.h | 3 +
include/linux/set_memory.h | 4 +-
include/linux/syscalls.h | 1 +
include/uapi/asm-generic/unistd.h | 7 +-
include/uapi/linux/magic.h | 1 +
include/uapi/linux/secretmem.h | 8 +
kernel/sys_ni.c | 2 +
mm/Kconfig | 4 +
mm/Makefile | 1 +
mm/gup.c | 10 +
mm/internal.h | 3 +
mm/mmap.c | 5 +-
mm/secretmem.c | 487 ++++++++++++++++++++++
mm/vmalloc.c | 5 +-
scripts/checksyscalls.sh | 4 +
tools/testing/selftests/vm/.gitignore | 1 +
tools/testing/selftests/vm/Makefile | 3 +-
tools/testing/selftests/vm/memfd_secret.c | 296 +++++++++++++
tools/testing/selftests/vm/run_vmtests | 17 +
34 files changed, 892 insertions(+), 35 deletions(-)
create mode 100644 include/uapi/linux/secretmem.h
create mode 100644 mm/secretmem.c
create mode 100644 tools/testing/selftests/vm/memfd_secret.c
--
2.28.0
From: SeongJae Park <sjpark(a)amazon.de>
Because commit d43c7fb05765 ("kunit: tool: fix improper treatment of
file location") removed 'kunit_kernel.kunitconfig_path' modification for
the '--builddir' argument, running kunit with '--build_dir' now fails
with below error message:
Traceback (most recent call last):
File "./tools/testing/kunit/kunit.py", line 325, in <module>
main(sys.argv[1:])
File "./tools/testing/kunit/kunit.py", line 245, in main
linux = kunit_kernel.LinuxSourceTree()
File "/home/sjpark/linux/tools/testing/kunit/kunit_kernel.py", line 109, in __init__
self._kconfig.read_from_file(kunitconfig_path)
File "/home/sjpark/linux/tools/testing/kunit/kunit_config.py", line 88, in read_from_file
with open(path, 'r') as f:
FileNotFoundError: [Errno 2] No such file or directory: '.kunitconfig'
As simply reverting the change now make the 'kunit_tool_test' fails
again, this commit fixes the problem by passing the 'build_dir' argument
to 'LinuxSourceTree' constructor.
Fixes: d43c7fb05765 ("kunit: tool: fix improper treatment of file location")
Signed-off-by: SeongJae Park <sjpark(a)amazon.de>
---
tools/testing/kunit/kunit.py | 8 ++++----
tools/testing/kunit/kunit_kernel.py | 4 ++--
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 425ef40067e7..611c23e178f8 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -242,7 +242,7 @@ def main(argv, linux=None):
os.mkdir(cli_args.build_dir)
if not linux:
- linux = kunit_kernel.LinuxSourceTree()
+ linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir)
request = KunitRequest(cli_args.raw_output,
cli_args.timeout,
@@ -259,7 +259,7 @@ def main(argv, linux=None):
os.mkdir(cli_args.build_dir)
if not linux:
- linux = kunit_kernel.LinuxSourceTree()
+ linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir)
request = KunitConfigRequest(cli_args.build_dir,
cli_args.make_options)
@@ -275,7 +275,7 @@ def main(argv, linux=None):
os.mkdir(cli_args.build_dir)
if not linux:
- linux = kunit_kernel.LinuxSourceTree()
+ linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir)
request = KunitBuildRequest(cli_args.jobs,
cli_args.build_dir,
@@ -293,7 +293,7 @@ def main(argv, linux=None):
os.mkdir(cli_args.build_dir)
if not linux:
- linux = kunit_kernel.LinuxSourceTree()
+ linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir)
exec_request = KunitExecRequest(cli_args.timeout,
cli_args.build_dir,
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index e20e2056cb38..16a997504317 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -104,9 +104,9 @@ def get_kconfig_path(build_dir):
class LinuxSourceTree(object):
"""Represents a Linux kernel source tree with KUnit tests."""
- def __init__(self):
+ def __init__(self, build_dir):
self._kconfig = kunit_config.Kconfig()
- self._kconfig.read_from_file(kunitconfig_path)
+ self._kconfig.read_from_file(os.path.join(build_dir, kunitconfig_path))
self._ops = LinuxSourceTreeOperations()
signal.signal(signal.SIGINT, self.signal_handler)
--
2.17.1
From: Yonghong Song <yhs(a)fb.com>
[ Upstream commit 6e057fc15a2da4ee03eb1fa6889cf687e690106e ]
When tweaking llvm optimizations, I found that selftest build failed
with the following error:
libbpf: elf: skipping unrecognized data section(6) .rodata.str1.1
libbpf: prog 'sysctl_tcp_mem': bad map relo against '.L__const.is_tcp_mem.tcp_mem_name'
in section '.rodata.str1.1'
Error: failed to open BPF object file: Relocation failed
make: *** [/work/net-next/tools/testing/selftests/bpf/test_sysctl_prog.skel.h] Error 255
make: *** Deleting file `/work/net-next/tools/testing/selftests/bpf/test_sysctl_prog.skel.h'
The local string constant "tcp_mem_name" is put into '.rodata.str1.1' section
which libbpf cannot handle. Using untweaked upstream llvm, "tcp_mem_name"
is completely inlined after loop unrolling.
Commit 7fb5eefd7639 ("selftests/bpf: Fix test_sysctl_loop{1, 2}
failure due to clang change") solved a similar problem by defining
the string const as a global. Let us do the same here
for test_sysctl_prog.c so it can weather future potential llvm changes.
Signed-off-by: Yonghong Song <yhs(a)fb.com>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Acked-by: Andrii Nakryiko <andriin(a)fb.com>
Link: https://lore.kernel.org/bpf/20200910202718.956042-1-yhs@fb.com
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
tools/testing/selftests/bpf/progs/test_sysctl_prog.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index 5cbbff416998c..4396faf33394a 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -19,11 +19,11 @@
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
+const char tcp_mem_name[] = "net/ipv4/tcp_mem";
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
- char tcp_mem_name[] = "net/ipv4/tcp_mem";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
--
2.25.1
From: Yonghong Song <yhs(a)fb.com>
[ Upstream commit 6e057fc15a2da4ee03eb1fa6889cf687e690106e ]
When tweaking llvm optimizations, I found that selftest build failed
with the following error:
libbpf: elf: skipping unrecognized data section(6) .rodata.str1.1
libbpf: prog 'sysctl_tcp_mem': bad map relo against '.L__const.is_tcp_mem.tcp_mem_name'
in section '.rodata.str1.1'
Error: failed to open BPF object file: Relocation failed
make: *** [/work/net-next/tools/testing/selftests/bpf/test_sysctl_prog.skel.h] Error 255
make: *** Deleting file `/work/net-next/tools/testing/selftests/bpf/test_sysctl_prog.skel.h'
The local string constant "tcp_mem_name" is put into '.rodata.str1.1' section
which libbpf cannot handle. Using untweaked upstream llvm, "tcp_mem_name"
is completely inlined after loop unrolling.
Commit 7fb5eefd7639 ("selftests/bpf: Fix test_sysctl_loop{1, 2}
failure due to clang change") solved a similar problem by defining
the string const as a global. Let us do the same here
for test_sysctl_prog.c so it can weather future potential llvm changes.
Signed-off-by: Yonghong Song <yhs(a)fb.com>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Acked-by: Andrii Nakryiko <andriin(a)fb.com>
Link: https://lore.kernel.org/bpf/20200910202718.956042-1-yhs@fb.com
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
tools/testing/selftests/bpf/progs/test_sysctl_prog.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index 50525235380e8..5489823c83fc2 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -19,11 +19,11 @@
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
+const char tcp_mem_name[] = "net/ipv4/tcp_mem";
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
- char tcp_mem_name[] = "net/ipv4/tcp_mem";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
--
2.25.1
From: Yonghong Song <yhs(a)fb.com>
[ Upstream commit 6e057fc15a2da4ee03eb1fa6889cf687e690106e ]
When tweaking llvm optimizations, I found that selftest build failed
with the following error:
libbpf: elf: skipping unrecognized data section(6) .rodata.str1.1
libbpf: prog 'sysctl_tcp_mem': bad map relo against '.L__const.is_tcp_mem.tcp_mem_name'
in section '.rodata.str1.1'
Error: failed to open BPF object file: Relocation failed
make: *** [/work/net-next/tools/testing/selftests/bpf/test_sysctl_prog.skel.h] Error 255
make: *** Deleting file `/work/net-next/tools/testing/selftests/bpf/test_sysctl_prog.skel.h'
The local string constant "tcp_mem_name" is put into '.rodata.str1.1' section
which libbpf cannot handle. Using untweaked upstream llvm, "tcp_mem_name"
is completely inlined after loop unrolling.
Commit 7fb5eefd7639 ("selftests/bpf: Fix test_sysctl_loop{1, 2}
failure due to clang change") solved a similar problem by defining
the string const as a global. Let us do the same here
for test_sysctl_prog.c so it can weather future potential llvm changes.
Signed-off-by: Yonghong Song <yhs(a)fb.com>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Acked-by: Andrii Nakryiko <andriin(a)fb.com>
Link: https://lore.kernel.org/bpf/20200910202718.956042-1-yhs@fb.com
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
tools/testing/selftests/bpf/progs/test_sysctl_prog.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index 50525235380e8..5489823c83fc2 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -19,11 +19,11 @@
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
+const char tcp_mem_name[] = "net/ipv4/tcp_mem";
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
- char tcp_mem_name[] = "net/ipv4/tcp_mem";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
--
2.25.1
Due to the raw_output() function on kunit_parser.py actually being a
generator, it only runs if something reads the lines it returns. Since
we no-longer do that (parsing doesn't actually happen if raw_output is
enabled), it was not printing anything.
Fixes: 45ba7a893ad89114e773b3dc32f6431354c465d6 ("kunit: kunit_tool: Separate out config/build/exec/parse")
Signed-off-by: David Gow <davidgow(a)google.com>
---
tools/testing/kunit/kunit_parser.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index 8019e3dd4c32..744ee9cb0073 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -66,7 +66,6 @@ def isolate_kunit_output(kernel_output):
def raw_output(kernel_output):
for line in kernel_output:
print(line)
- yield line
DIVIDER = '=' * 60
--
2.29.0.rc1.297.gfa9743e501-goog
As we're close to the end of the merge window for Kernel 5.10,
this series contain the patches from the past two documentation
fix series I sent during the merge window and that required more
work.
It is based on the top of upstream. The full series with the patches
that either didn't generate any reply or have been acked is on
this branch:
https://git.linuxtv.org/mchehab/experimental.git/log/?h=docs_for_v5.10
There are a couple of warnings that aren't addressed here, because
they don't show at linux-next. I'm keeping a second patch series
against next-20201021 fixing additional warnings caused by patches
pending merges.
I'll be posting those in separate.
Regards,
Mauro
Mauro Carvalho Chehab (6):
drm: amdgpu: kernel-doc: update some adev parameters
docs: lockdep-design: fix some warning issues
locking/refcount: move kernel-doc markups to the proper place
IB/srpt: docs: add a description for cq_size member
kunit: test: fix remaining kernel-doc warnings
docs: fs: api-summary.rst: get rid of kernel-doc include
Documentation/filesystems/api-summary.rst | 3 -
Documentation/locking/lockdep-design.rst | 51 +++---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 28 ++--
drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 6 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 7 +-
drivers/infiniband/ulp/srpt/ib_srpt.h | 1 +
include/kunit/test.h | 16 +-
include/linux/refcount.h | 158 +++++++++----------
8 files changed, 139 insertions(+), 131 deletions(-)
--
2.26.2
Hi,
Reposting for -rc1, with some fixes, and an additional path at the end,
too. I've swept through and looked for problems, as well.
Changes since v2 [1]:
* Rebased onto 5.10-rc1
* Fixed an improper ".." include path, indentified by Linus [2].
* Added an "if (cmd == DUMP_USER_PAGES_TEST)" guard to invoking
dump_pages_test(). Before, it worked, but it's too subtle to depend
merely on struct gup_test.which_pages[] being zeroed out, in order to
avoid dumping pages that are not requested to be dumped.
* Added a patch to the end: 2x speedup for run_vmtests.sh
* Tweaked some commit logs and comments slightly
Original cover letter, edited slightly:
Summary: This series provides two main things, and a number of smaller
supporting goodies. The two main points are:
1) Add a new sub-test to gup_test, which in turn is a renamed version of
gup_benchmark. This sub-test allows nicer testing of dump_pages(), at
least on user-space pages.
For quite a while, I was doing a quick hack to gup_test.c whenever I
wanted to try out changes to dump_page(). Then Matthew Wilcox asked me
what I meant when I said "I used my dump_page() unit test", and I
realized that it might be nice to check in a polished up version of
that.
Details about how it works and how to use it are in the commit
description for patch #6 ("selftests/vm: gup_test: introduce the
dump_pages() sub-test").
2) Fixes a limitation of hmm-tests: these tests are incredibly useful,
but only if people actually build and run them. And it turns out that
libhugetlbfs is a little too effective at throwing a wrench in the
works, there. So I've added a little configuration check that removes
just two of the 21 hmm-tests, if libhugetlbfs is not available.
Further details in the commit description of patch #8 ("selftests/vm:
hmm-tests: remove the libhugetlbfs dependency").
Other smaller things that this series does:
a) Remove code duplication by creating gup_test.h.
b) Clear up the sub-test organization, and their invocation within
run_vmtests.sh.
c) Other minor assorted improvements.
[1] v2 is here:
https://lore.kernel.org/linux-doc/20200929212747.251804-1-jhubbard@nvidia.c…
[2] https://lore.kernel.org/r/CAHk-=wgh-TMPHLY3jueHX7Y2fWh3D+nMBqVS__AZm6-oorqu…
John Hubbard (9):
mm/gup_benchmark: rename to mm/gup_test
selftests/vm: use a common gup_test.h
selftests/vm: rename run_vmtests --> run_vmtests.sh
selftests/vm: minor cleanup: Makefile and gup_test.c
selftests/vm: only some gup_test items are really benchmarks
selftests/vm: gup_test: introduce the dump_pages() sub-test
selftests/vm: run_vmtests.sh: update and clean up gup_test invocation
selftests/vm: hmm-tests: remove the libhugetlbfs dependency
selftests/vm: 2x speedup for run_vmtests.sh
Documentation/core-api/pin_user_pages.rst | 6 +-
arch/s390/configs/debug_defconfig | 2 +-
arch/s390/configs/defconfig | 2 +-
mm/Kconfig | 21 +-
mm/Makefile | 2 +-
mm/{gup_benchmark.c => gup_test.c} | 111 ++++++----
mm/gup_test.h | 32 +++
tools/testing/selftests/vm/.gitignore | 3 +-
tools/testing/selftests/vm/Makefile | 38 +++-
tools/testing/selftests/vm/check_config.sh | 31 +++
tools/testing/selftests/vm/config | 2 +-
tools/testing/selftests/vm/gup_benchmark.c | 143 -------------
tools/testing/selftests/vm/gup_test.c | 194 ++++++++++++++++++
tools/testing/selftests/vm/hmm-tests.c | 10 +-
.../vm/{run_vmtests => run_vmtests.sh} | 32 ++-
15 files changed, 416 insertions(+), 213 deletions(-)
rename mm/{gup_benchmark.c => gup_test.c} (60%)
create mode 100644 mm/gup_test.h
create mode 100644 tools/testing/selftests/vm/check_config.sh
delete mode 100644 tools/testing/selftests/vm/gup_benchmark.c
create mode 100644 tools/testing/selftests/vm/gup_test.c
rename tools/testing/selftests/vm/{run_vmtests => run_vmtests.sh} (88%)
base-commit: 3650b228f83adda7e5ee532e2b90429c03f7b9ec
--
2.29.0
The eeh-basic test got its own 60 seconds timeout (defined in commit
414f50434aa2 "selftests/eeh: Bump EEH wait time to 60s") per breakable
device.
And we have discovered that the number of breakable devices varies
on different hardware. The device recovery time ranges from 0 to 35
seconds. In our test pool it will take about 30 seconds to run on a
Power8 system that with 5 breakable devices, 60 seconds to run on a
Power9 system that with 4 breakable devices.
Thus it's better to disable the default 45 seconds timeout setting in
the kselftest framework to give it a chance to finish. And let the
test to take care of the timeout control.
Signed-off-by: Po-Hsu Lin <po-hsu.lin(a)canonical.com>
---
tools/testing/selftests/powerpc/eeh/Makefile | 2 +-
tools/testing/selftests/powerpc/eeh/settings | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/powerpc/eeh/settings
diff --git a/tools/testing/selftests/powerpc/eeh/Makefile b/tools/testing/selftests/powerpc/eeh/Makefile
index b397bab..ae963eb 100644
--- a/tools/testing/selftests/powerpc/eeh/Makefile
+++ b/tools/testing/selftests/powerpc/eeh/Makefile
@@ -3,7 +3,7 @@ noarg:
$(MAKE) -C ../
TEST_PROGS := eeh-basic.sh
-TEST_FILES := eeh-functions.sh
+TEST_FILES := eeh-functions.sh settings
top_srcdir = ../../../../..
include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/eeh/settings b/tools/testing/selftests/powerpc/eeh/settings
new file mode 100644
index 0000000..e7b9417
--- /dev/null
+++ b/tools/testing/selftests/powerpc/eeh/settings
@@ -0,0 +1 @@
+timeout=0
--
2.7.4
From: SeongJae Park <sjpark(a)amazon.de>
This patchset makes kunit tool to respect '.kunitconfig' under the
'--build_dir'.
Revision History
================
>From v1
(https://lore.kernel.org/linux-kselftest/20201012102621.32226-2-sjpark@amazo…)
- Rebase on master branch of linus' tree (Not a clean rebase)
- Add 'Reviewed-by: Brendan Higgins <brendanhiggins(a)google.com>' in
second patch
SeongJae Park (2):
kunit: tool: Respect '.kunitconfig' in 'build_dir'
kunit: tool: Mark 'kunittest_config' as constant again
tools/testing/kunit/kunit.py | 28 +++++++++++++++-------------
tools/testing/kunit/kunit_kernel.py | 6 +++---
2 files changed, 18 insertions(+), 16 deletions(-)
--
2.17.1
Recently, CONFIG_MPTCP_IPV6 no longer selects CONFIG_IPV6. As a
consequence, if CONFIG_MPTCP_IPV6=y is added to the kconfig, it will no
longer ensure CONFIG_IPV6=y. If it is not enabled, CONFIG_MPTCP_IPV6
will stay disabled and selftests will fail.
We also need CONFIG_IPV6 to be built-in. For more details, please see
commit 0ed37ac586c0 ("mptcp: depends on IPV6 but not as a module").
Note that 'make kselftest-merge' will take all 'config' files found in
'tools/testsing/selftests'. Because some of them already set
CONFIG_IPV6=y, MPTCP selftests were still passing. But they will fail if
MPTCP selftests are launched manually after having executed this command
to prepare the kernel config:
./scripts/kconfig/merge_config.sh -m .config \
./tools/testing/selftests/net/mptcp/config
Fixes: 010b430d5df5 ("mptcp: MPTCP_IPV6 should depend on IPV6 instead of selecting it")
Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net>
---
tools/testing/selftests/net/mptcp/config | 1 +
1 file changed, 1 insertion(+)
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 8df5cb8f71ff..741a1c4f4ae8 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -1,4 +1,5 @@
CONFIG_MPTCP=y
+CONFIG_IPV6=y
CONFIG_MPTCP_IPV6=y
CONFIG_INET_DIAG=m
CONFIG_INET_MPTCP_DIAG=m
--
2.27.0
From: SeongJae Park <sjpark(a)amazon.de>
If 'CONFIG_KUNIT=m', letting kunit tests that do not support loadable
module build depends on 'KUNIT' instead of 'KUNIT=y' result in compile
errors. This commit updates the document for this.
Fixes: 9fe124bf1b77 ("kunit: allow kunit to be loaded as a module")
Signed-off-by: SeongJae Park <sjpark(a)amazon.de>
Reviewed-by: David Gow <davidgow(a)google.com>
---
Changes from v2
(https://lore.kernel.org/linux-kselftest/20201013063743.32179-1-sjpark@amazo…)
- Fix a grammar issue (David Gow)
- Add 'Reviewed-by: David Gow <davidgow(a)google.com>'
Changes from v1
(https://lore.kernel.org/linux-kselftest/20201012105420.5945-1-sjpark@amazon…)
- Fix a typo (Marco Elver)
---
Documentation/dev-tools/kunit/start.rst | 2 +-
Documentation/dev-tools/kunit/usage.rst | 5 +++++
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/Documentation/dev-tools/kunit/start.rst b/Documentation/dev-tools/kunit/start.rst
index d23385e3e159..454f307813ea 100644
--- a/Documentation/dev-tools/kunit/start.rst
+++ b/Documentation/dev-tools/kunit/start.rst
@@ -197,7 +197,7 @@ Now add the following to ``drivers/misc/Kconfig``:
config MISC_EXAMPLE_TEST
bool "Test for my example"
- depends on MISC_EXAMPLE && KUNIT
+ depends on MISC_EXAMPLE && KUNIT=y
and the following to ``drivers/misc/Makefile``:
diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst
index 961d3ea3ca19..62142a47488c 100644
--- a/Documentation/dev-tools/kunit/usage.rst
+++ b/Documentation/dev-tools/kunit/usage.rst
@@ -561,6 +561,11 @@ Once the kernel is built and installed, a simple
...will run the tests.
+.. note::
+ Note that you should make sure your test depends on ``KUNIT=y`` in Kconfig
+ if the test does not support module build. Otherwise, it will trigger
+ compile errors if ``CONFIG_KUNIT`` is ``m``.
+
Writing new tests for other architectures
-----------------------------------------
--
2.17.1
From: SeongJae Park <sjpark(a)amazon.de>
If 'CONFIG_KUNIT=m', letting kunit tests that do not support loadable
module build depends on 'KUNIT' instead of 'KUNIT=y' result in compile
errors. This commit updates the document for this.
Fixes: 9fe124bf1b77 ("kunit: allow kunit to be loaded as a module")
Signed-off-by: SeongJae Park <sjpark(a)amazon.de>
---
Changes from v1
(https://lore.kernel.org/linux-kselftest/20201012105420.5945-1-sjpark@amazon…):
- Fix a typo (Marco Elver)
---
Documentation/dev-tools/kunit/start.rst | 2 +-
Documentation/dev-tools/kunit/usage.rst | 5 +++++
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/Documentation/dev-tools/kunit/start.rst b/Documentation/dev-tools/kunit/start.rst
index d23385e3e159..454f307813ea 100644
--- a/Documentation/dev-tools/kunit/start.rst
+++ b/Documentation/dev-tools/kunit/start.rst
@@ -197,7 +197,7 @@ Now add the following to ``drivers/misc/Kconfig``:
config MISC_EXAMPLE_TEST
bool "Test for my example"
- depends on MISC_EXAMPLE && KUNIT
+ depends on MISC_EXAMPLE && KUNIT=y
and the following to ``drivers/misc/Makefile``:
diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst
index 3c3fe8b5fecc..b331f5a5b0b9 100644
--- a/Documentation/dev-tools/kunit/usage.rst
+++ b/Documentation/dev-tools/kunit/usage.rst
@@ -556,6 +556,11 @@ Once the kernel is built and installed, a simple
...will run the tests.
+.. note::
+ Note that you should make your test depends on ``KUNIT=y`` in Kconfig if the
+ test does not support module build. Otherwise, it will trigger compile
+ errors if ``CONFIG_KUNIT`` is ``m``.
+
Writing new tests for other architectures
-----------------------------------------
--
2.17.1
This patchset adds support for the SRv6 End.DT4 behavior.
The SRv6 End.DT4 is used to implement multi-tenant IPv4 L3VPN. It decapsulates
the received packets and performs IPv4 routing lookup in the routing table of
the tenant. The SRv6 End.DT4 Linux implementation leverages a VRF device. SRv6
End.DT4 is defined in the SRv6 Network Programming [1].
- Patch 1/4 is needed to solve a pre-existing issue with tunneled packets
when a sniffer is attached;
- Patch 2/4 introduces two callbacks used for customizing the
creation/destruction of a SRv6 behavior;
- Patch 3/4 is the core patch that adds support for the SRv6 End.DT4 behavior;
- Patch 4/4 adds the selftest for SRv6 End.DT4.
I would like to thank David Ahern for his support during the development of
this patch set.
Comments, suggestions and improvements are very welcome!
Thanks,
Andrea Mayer
[1] https://tools.ietf.org/html/draft-ietf-spring-srv6-network-programming
Andrea Mayer (4):
vrf: push mac header for tunneled packets when sniffer is attached
seg6: add callbacks for customizing the creation/destruction of a
behavior
seg6: add support for the SRv6 End.DT4 behavior
add selftest for the SRv6 End.DT4 behavior
drivers/net/vrf.c | 78 ++-
net/ipv6/seg6_local.c | 261 ++++++++++
.../selftests/net/srv6_end_dt4_l3vpn_test.sh | 490 ++++++++++++++++++
3 files changed, 823 insertions(+), 6 deletions(-)
create mode 100755 tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
--
2.20.1
Empty test suite is okay test suite.
Don't fail the rest of the test suites if one of them is empty.
Fixes: 6ebf5866f2e8 ("kunit: tool: add Python wrappers for running KUnit tests")
Signed-off-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
---
tools/testing/kunit/kunit_parser.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index 8019e3dd4c32..12b9fc652ef6 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -242,7 +242,7 @@ def parse_test_suite(lines: List[str], expected_suite_index: int) -> TestSuite:
return None
test_suite.name = name
expected_test_case_num = parse_subtest_plan(lines)
- if not expected_test_case_num:
+ if expected_test_case_num is None:
return None
while expected_test_case_num > 0:
test_case = parse_test_case(lines)
--
2.28.0
From: Ira Weiny <ira.weiny(a)intel.com>
This RFC series has been reviewed by Dave Hansen.
Introduce a new page protection mechanism for supervisor pages, Protection Key
Supervisor (PKS).
2 use cases for PKS are being developed, trusted keys and PMEM. Trusted keys
is a newer use case which is still being explored. PMEM was submitted as part
of the RFC (v2) series[1]. However, since then it was found that some callers
of kmap() require a global implementation of PKS. Specifically some users of
kmap() expect mappings to be available to all kernel threads. While global use
of PKS is rare it needs to be included for correctness. Unfortunately the
kmap() updates required a large patch series to make the needed changes at the
various kmap() call sites so that patch set has been split out. Because the
global PKS feature is only required for that use case it will be deferred to
that set as well.[2] This patch set is being submitted as a precursor to both
of the use cases.
For an overview of the entire PKS ecosystem, a git tree including this series
and the 2 use cases can be found here:
https://github.com/weiny2/linux-kernel/tree/pks-rfc-v3
PKS enables protections on 'domains' of supervisor pages to limit supervisor
mode access to those pages beyond the normal paging protections. PKS works in
a similar fashion to user space pkeys, PKU. As with PKU, supervisor pkeys are
checked in addition to normal paging protections and Access or Writes can be
disabled via a MSR update without TLB flushes when permissions change. Also
like PKU, a page mapping is assigned to a domain by setting pkey bits in the
page table entry for that mapping.
Access is controlled through a PKRS register which is updated via WRMSR/RDMSR.
XSAVE is not supported for the PKRS MSR. Therefore the implementation
saves/restores the MSR across context switches and during exceptions. Nested
exceptions are supported by each exception getting a new PKS state.
For consistent behavior with current paging protections, pkey 0 is reserved and
configured to allow full access via the pkey mechanism, thus preserving the
default paging protections on mappings with the default pkey value of 0.
Other keys, (1-15) are allocated by an allocator which prepares us for key
contention from day one. Kernel users should be prepared for the allocator to
fail either because of key exhaustion or due to PKS not being supported on the
arch and/or CPU instance.
The following are key attributes of PKS.
1) Fast switching of permissions
1a) Prevents access without page table manipulations
1b) No TLB flushes required
2) Works on a per thread basis
PKS is available with 4 and 5 level paging. Like PKRU it consumes 4 bits from
the PTE to store the pkey within the entry.
[1] https://lore.kernel.org/lkml/20200717072056.73134-1-ira.weiny@intel.com/
[2] https://github.com/weiny2/linux-kernel/commit/f10abb0f0d7b4e14f03fc8890313a…
and a testing patch
https://github.com/weiny2/linux-kernel/commit/2a8e0fc7654a7c69b243d628f63b0…
Fenghua Yu (3):
x86/fpu: Refactor arch_set_user_pkey_access() for PKS support
x86/pks: Enable Protection Keys Supervisor (PKS)
x86/pks: Add PKS kernel API
Ira Weiny (6):
x86/pkeys: Create pkeys_common.h
x86/pks: Preserve the PKRS MSR on context switch
x86/entry: Pass irqentry_state_t by reference
x86/entry: Preserve PKRS MSR across exceptions
x86/fault: Report the PKRS state on fault
x86/pks: Add PKS test code
Documentation/core-api/protection-keys.rst | 102 ++-
arch/x86/Kconfig | 1 +
arch/x86/entry/common.c | 57 +-
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/include/asm/idtentry.h | 29 +-
arch/x86/include/asm/msr-index.h | 1 +
arch/x86/include/asm/pgtable.h | 13 +-
arch/x86/include/asm/pgtable_types.h | 12 +
arch/x86/include/asm/pkeys.h | 15 +
arch/x86/include/asm/pkeys_common.h | 36 +
arch/x86/include/asm/processor.h | 13 +
arch/x86/include/uapi/asm/processor-flags.h | 2 +
arch/x86/kernel/cpu/common.c | 17 +
arch/x86/kernel/cpu/mce/core.c | 4 +
arch/x86/kernel/fpu/xstate.c | 22 +-
arch/x86/kernel/kvm.c | 4 +-
arch/x86/kernel/nmi.c | 7 +-
arch/x86/kernel/process.c | 21 +
arch/x86/kernel/traps.c | 21 +-
arch/x86/mm/fault.c | 86 ++-
arch/x86/mm/pkeys.c | 188 +++++-
include/linux/entry-common.h | 19 +-
include/linux/pgtable.h | 4 +
include/linux/pkeys.h | 23 +-
kernel/entry/common.c | 28 +-
lib/Kconfig.debug | 12 +
lib/Makefile | 3 +
lib/pks/Makefile | 3 +
lib/pks/pks_test.c | 690 ++++++++++++++++++++
mm/Kconfig | 2 +
tools/testing/selftests/x86/Makefile | 3 +-
tools/testing/selftests/x86/test_pks.c | 65 ++
32 files changed, 1376 insertions(+), 128 deletions(-)
create mode 100644 arch/x86/include/asm/pkeys_common.h
create mode 100644 lib/pks/Makefile
create mode 100644 lib/pks/pks_test.c
create mode 100644 tools/testing/selftests/x86/test_pks.c
--
2.28.0.rc0.12.gb6a658bd00c9
The kci_test_encap_fou() test from kci_test_encap() in rtnetlink.sh
needs the fou module to work. Otherwise it will fail with:
$ ip netns exec "$testns" ip fou add port 7777 ipproto 47
RTNETLINK answers: No such file or directory
Error talking to the kernel
Add the CONFIG_NET_FOU into the config file as well. Which needs at
least to be set as a loadable module.
Signed-off-by: Po-Hsu Lin <po-hsu.lin(a)canonical.com>
---
tools/testing/selftests/net/config | 1 +
tools/testing/selftests/net/rtnetlink.sh | 5 +++++
2 files changed, 6 insertions(+)
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 4364924..4d5df8e 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -33,3 +33,4 @@ CONFIG_KALLSYMS=y
CONFIG_TRACEPOINTS=y
CONFIG_NET_DROP_MONITOR=m
CONFIG_NETDEVSIM=m
+CONFIG_NET_FOU=m
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 8a2fe6d..c9ce3df 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -520,6 +520,11 @@ kci_test_encap_fou()
return $ksft_skip
fi
+ if ! /sbin/modprobe -q -n fou; then
+ echo "SKIP: module fou is not found"
+ return $ksft_skip
+ fi
+ /sbin/modprobe -q fou
ip -netns "$testns" fou add port 7777 ipproto 47 2>/dev/null
if [ $? -ne 0 ];then
echo "FAIL: can't add fou port 7777, skipping test"
--
2.7.4
The kci_test_encap_fou() test from kci_test_encap() in rtnetlink.sh
needs the fou module to work. Otherwise it will fail with:
$ ip netns exec "$testns" ip fou add port 7777 ipproto 47
RTNETLINK answers: No such file or directory
Error talking to the kernel
Add the CONFIG_NET_FOU into the config file as well. Which needs at
least to be set as a loadable module.
Signed-off-by: Po-Hsu Lin <po-hsu.lin(a)canonical.com>
---
tools/testing/selftests/net/config | 1 +
tools/testing/selftests/net/rtnetlink.sh | 5 +++++
2 files changed, 6 insertions(+)
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 3b42c06b..c5e50ab 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -31,3 +31,4 @@ CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_NETEM=y
CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_KALLSYMS=y
+CONFIG_NET_FOU=m
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 7c38a90..6f8f159 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -520,6 +520,11 @@ kci_test_encap_fou()
return $ksft_skip
fi
+ if ! /sbin/modprobe -q -n fou; then
+ echo "SKIP: module fou is not found"
+ return $ksft_skip
+ fi
+ /sbin/modprobe -q fou
ip -netns "$testns" fou add port 7777 ipproto 47 2>/dev/null
if [ $? -ne 0 ];then
echo "FAIL: can't add fou port 7777, skipping test"
--
2.7.4
Hi Linus,
Please pull the following Kunit next update for Linux 5.10-rc1.
This Kunit update for Linux 5.10-rc1 consists of:
- add Kunit to kernel_init() and remove KUnit from init calls entirely.
This addresses the concern Kunit would not work correctly during
late init phase.
- add a linker section where KUnit can put references to its test
suites.
This patch is the first step in transitioning to dispatching all KUnit
tests from a centralized executor rather than having each as its own
separate late_initcall.
- add a centralized executor to dispatch tests rather than relying on
late_initcall to schedule each test suite separately. Centralized
execution is for built-in tests only; modules will execute tests when
loaded.
- convert bitfield test to use KUnit framework
- Documentation updates for naming guidelines and how kunit_test_suite()
works.
- add test plan to KUnit TAP format
diff is attached.
Please note that there is a conflict in lib/kunit/test.c
between commit:
45dcbb6f5ef7 ("kunit: test: add test plan to KUnit TAP format")
from the kunit-next tree and commit:
e685acc91080 ("KUnit: KASAN Integration")
from the akpm-current tree. (now in master)
Stephen fixed this up in linux-next. Please let me know if you run
into any problems.
thanks,
-- Shuah
----------------------------------------------------------------
The following changes since commit 9123e3a74ec7b934a4a099e98af6a61c2f80bbf5:
Linux 5.9-rc1 (2020-08-16 13:04:57 -0700)
are available in the Git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest
tags/linux-kselftest-kunit-5.10-rc1
for you to fetch changes up to 294a7f1613ee49a608361bd319519561c0ca7e72:
lib: kunit: Fix compilation test when using TEST_BIT_FIELD_COMPILE
(2020-10-16 13:25:14 -0600)
----------------------------------------------------------------
linux-kselftest-kunit-5.10-rc1
This Kunit update for Linux 5.10-rc1 consists of:
- add Kunit to kernel_init() and remove KUnit from init calls entirely.
This addresses the concern Kunit would not work correctly during
late init phase.
- add a linker section where KUnit can put references to its test suites.
This patch is the first step in transitioning to dispatching all KUnit
tests from a centralized executor rather than having each as its own
separate late_initcall.
- add a centralized executor to dispatch tests rather than relying on
late_initcall to schedule each test suite separately. Centralized
execution is for built-in tests only; modules will execute tests when
loaded.
- convert bitfield test to use KUnit framework
- Documentation updates for naming guidelines and how kunit_test_suite()
works.
- add test plan to KUnit TAP format
----------------------------------------------------------------
Alan Maguire (1):
kunit: test: create a single centralized executor for all tests
Brendan Higgins (4):
vmlinux.lds.h: add linker section for KUnit test suites
init: main: add KUnit to kernel init
kunit: test: add test plan to KUnit TAP format
Documentation: kunit: add a brief blurb about kunit_test_suite
David Gow (1):
Documentation: kunit: Add naming guidelines
Vitor Massaru Iha (2):
lib: kunit: add bitfield test conversion to KUnit
lib: kunit: Fix compilation test when using TEST_BIT_FIELD_COMPILE
Documentation/dev-tools/kunit/index.rst | 1 +
Documentation/dev-tools/kunit/style.rst | 205
+++++++++++++++++++++
Documentation/dev-tools/kunit/usage.rst | 5 +
include/asm-generic/vmlinux.lds.h | 10 +-
include/kunit/test.h | 76 +++++---
init/main.c | 4 +
lib/Kconfig.debug | 23 ++-
lib/Makefile | 2 +-
lib/{test_bitfield.c => bitfield_kunit.c} | 90 ++++-----
lib/kunit/Makefile | 3 +-
lib/kunit/executor.c | 43 +++++
lib/kunit/test.c | 13 +-
tools/testing/kunit/kunit_parser.py | 76 ++++++--
.../test_data/test_is_test_passed-all_passed.log | Bin 1562 -> 1567
bytes
.../kunit/test_data/test_is_test_passed-crash.log | Bin 3016 -> 3021
bytes
.../test_data/test_is_test_passed-failure.log | Bin 1700 -> 1705
bytes
16 files changed, 441 insertions(+), 110 deletions(-)
create mode 100644 Documentation/dev-tools/kunit/style.rst
rename lib/{test_bitfield.c => bitfield_kunit.c} (67%)
create mode 100644 lib/kunit/executor.c
----------------------------------------------------------------
From: Yonghong Song <yhs(a)fb.com>
[ Upstream commit 7fb5eefd76394cfefb380724a87ca40b47d44405 ]
Andrii reported that with latest clang, when building selftests, we have
error likes:
error: progs/test_sysctl_loop1.c:23:16: in function sysctl_tcp_mem i32 (%struct.bpf_sysctl*):
Looks like the BPF stack limit of 512 bytes is exceeded.
Please move large on stack variables into BPF per-cpu array map.
The error is triggered by the following LLVM patch:
https://reviews.llvm.org/D87134
For example, the following code is from test_sysctl_loop1.c:
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
...
}
Without the above LLVM patch, the compiler did optimization to load the string
(59 bytes long) with 7 64bit loads, 1 8bit load and 1 16bit load,
occupying 64 byte stack size.
With the above LLVM patch, the compiler only uses 8bit loads, but subregister is 32bit.
So stack requirements become 4 * 59 = 236 bytes. Together with other stuff on
the stack, total stack size exceeds 512 bytes, hence compiler complains and quits.
To fix the issue, removing "volatile" key word or changing "volatile" to
"const"/"static const" does not work, the string is put in .rodata.str1.1 section,
which libbpf did not process it and errors out with
libbpf: elf: skipping unrecognized data section(6) .rodata.str1.1
libbpf: prog 'sysctl_tcp_mem': bad map relo against '.L__const.is_tcp_mem.tcp_mem_name'
in section '.rodata.str1.1'
Defining the string const as global variable can fix the issue as it puts the string constant
in '.rodata' section which is recognized by libbpf. In the future, when libbpf can process
'.rodata.str*.*' properly, the global definition can be changed back to local definition.
Defining tcp_mem_name as a global, however, triggered a verifier failure.
./test_progs -n 7/21
libbpf: load bpf program failed: Permission denied
libbpf: -- BEGIN DUMP LOG ---
libbpf:
invalid stack off=0 size=1
verification time 6975 usec
stack depth 160+64
processed 889 insns (limit 1000000) max_states_per_insn 4 total_states
14 peak_states 14 mark_read 10
libbpf: -- END LOG --
libbpf: failed to load program 'sysctl_tcp_mem'
libbpf: failed to load object 'test_sysctl_loop2.o'
test_bpf_verif_scale:FAIL:114
#7/21 test_sysctl_loop2.o:FAIL
This actually exposed a bpf program bug. In test_sysctl_loop{1,2}, we have code
like
const char tcp_mem_name[] = "<...long string...>";
...
char name[64];
...
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
In the above code, if sizeof(tcp_mem_name) > 64, name[i] access may be
out of bound. The sizeof(tcp_mem_name) is 59 for test_sysctl_loop1.c and
79 for test_sysctl_loop2.c.
Without promotion-to-global change, old compiler generates code where
the overflowed stack access is actually filled with valid value, so hiding
the bpf program bug. With promotion-to-global change, the code is different,
more specifically, the previous loading constants to stack is gone, and
"name" occupies stack[-64:0] and overflow access triggers a verifier error.
To fix the issue, adjust "name" buffer size properly.
Reported-by: Andrii Nakryiko <andriin(a)fb.com>
Signed-off-by: Yonghong Song <yhs(a)fb.com>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Acked-by: Andrii Nakryiko <andriin(a)fb.com>
Link: https://lore.kernel.org/bpf/20200909171542.3673449-1-yhs@fb.com
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
tools/testing/selftests/bpf/progs/test_sysctl_loop1.c | 4 ++--
tools/testing/selftests/bpf/progs/test_sysctl_loop2.c | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
index d22e438198cf7..9af8822ece477 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
@@ -18,11 +18,11 @@
#define MAX_ULONG_STR_LEN 7
#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
- volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
index cb201cbe11e77..55251046c9b73 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
@@ -18,11 +18,11 @@
#define MAX_ULONG_STR_LEN 7
#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx)
{
- volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
--
2.25.1
From: Yonghong Song <yhs(a)fb.com>
[ Upstream commit 7fb5eefd76394cfefb380724a87ca40b47d44405 ]
Andrii reported that with latest clang, when building selftests, we have
error likes:
error: progs/test_sysctl_loop1.c:23:16: in function sysctl_tcp_mem i32 (%struct.bpf_sysctl*):
Looks like the BPF stack limit of 512 bytes is exceeded.
Please move large on stack variables into BPF per-cpu array map.
The error is triggered by the following LLVM patch:
https://reviews.llvm.org/D87134
For example, the following code is from test_sysctl_loop1.c:
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
...
}
Without the above LLVM patch, the compiler did optimization to load the string
(59 bytes long) with 7 64bit loads, 1 8bit load and 1 16bit load,
occupying 64 byte stack size.
With the above LLVM patch, the compiler only uses 8bit loads, but subregister is 32bit.
So stack requirements become 4 * 59 = 236 bytes. Together with other stuff on
the stack, total stack size exceeds 512 bytes, hence compiler complains and quits.
To fix the issue, removing "volatile" key word or changing "volatile" to
"const"/"static const" does not work, the string is put in .rodata.str1.1 section,
which libbpf did not process it and errors out with
libbpf: elf: skipping unrecognized data section(6) .rodata.str1.1
libbpf: prog 'sysctl_tcp_mem': bad map relo against '.L__const.is_tcp_mem.tcp_mem_name'
in section '.rodata.str1.1'
Defining the string const as global variable can fix the issue as it puts the string constant
in '.rodata' section which is recognized by libbpf. In the future, when libbpf can process
'.rodata.str*.*' properly, the global definition can be changed back to local definition.
Defining tcp_mem_name as a global, however, triggered a verifier failure.
./test_progs -n 7/21
libbpf: load bpf program failed: Permission denied
libbpf: -- BEGIN DUMP LOG ---
libbpf:
invalid stack off=0 size=1
verification time 6975 usec
stack depth 160+64
processed 889 insns (limit 1000000) max_states_per_insn 4 total_states
14 peak_states 14 mark_read 10
libbpf: -- END LOG --
libbpf: failed to load program 'sysctl_tcp_mem'
libbpf: failed to load object 'test_sysctl_loop2.o'
test_bpf_verif_scale:FAIL:114
#7/21 test_sysctl_loop2.o:FAIL
This actually exposed a bpf program bug. In test_sysctl_loop{1,2}, we have code
like
const char tcp_mem_name[] = "<...long string...>";
...
char name[64];
...
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
In the above code, if sizeof(tcp_mem_name) > 64, name[i] access may be
out of bound. The sizeof(tcp_mem_name) is 59 for test_sysctl_loop1.c and
79 for test_sysctl_loop2.c.
Without promotion-to-global change, old compiler generates code where
the overflowed stack access is actually filled with valid value, so hiding
the bpf program bug. With promotion-to-global change, the code is different,
more specifically, the previous loading constants to stack is gone, and
"name" occupies stack[-64:0] and overflow access triggers a verifier error.
To fix the issue, adjust "name" buffer size properly.
Reported-by: Andrii Nakryiko <andriin(a)fb.com>
Signed-off-by: Yonghong Song <yhs(a)fb.com>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Acked-by: Andrii Nakryiko <andriin(a)fb.com>
Link: https://lore.kernel.org/bpf/20200909171542.3673449-1-yhs@fb.com
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
tools/testing/selftests/bpf/progs/test_sysctl_loop1.c | 4 ++--
tools/testing/selftests/bpf/progs/test_sysctl_loop2.c | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
index 458b0d69133e4..553a282d816ab 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
@@ -18,11 +18,11 @@
#define MAX_ULONG_STR_LEN 7
#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
- volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
index b2e6f9b0894d8..2b64bc563a12e 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
@@ -18,11 +18,11 @@
#define MAX_ULONG_STR_LEN 7
#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx)
{
- volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
--
2.25.1
From: Yonghong Song <yhs(a)fb.com>
[ Upstream commit 7fb5eefd76394cfefb380724a87ca40b47d44405 ]
Andrii reported that with latest clang, when building selftests, we have
error likes:
error: progs/test_sysctl_loop1.c:23:16: in function sysctl_tcp_mem i32 (%struct.bpf_sysctl*):
Looks like the BPF stack limit of 512 bytes is exceeded.
Please move large on stack variables into BPF per-cpu array map.
The error is triggered by the following LLVM patch:
https://reviews.llvm.org/D87134
For example, the following code is from test_sysctl_loop1.c:
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
...
}
Without the above LLVM patch, the compiler did optimization to load the string
(59 bytes long) with 7 64bit loads, 1 8bit load and 1 16bit load,
occupying 64 byte stack size.
With the above LLVM patch, the compiler only uses 8bit loads, but subregister is 32bit.
So stack requirements become 4 * 59 = 236 bytes. Together with other stuff on
the stack, total stack size exceeds 512 bytes, hence compiler complains and quits.
To fix the issue, removing "volatile" key word or changing "volatile" to
"const"/"static const" does not work, the string is put in .rodata.str1.1 section,
which libbpf did not process it and errors out with
libbpf: elf: skipping unrecognized data section(6) .rodata.str1.1
libbpf: prog 'sysctl_tcp_mem': bad map relo against '.L__const.is_tcp_mem.tcp_mem_name'
in section '.rodata.str1.1'
Defining the string const as global variable can fix the issue as it puts the string constant
in '.rodata' section which is recognized by libbpf. In the future, when libbpf can process
'.rodata.str*.*' properly, the global definition can be changed back to local definition.
Defining tcp_mem_name as a global, however, triggered a verifier failure.
./test_progs -n 7/21
libbpf: load bpf program failed: Permission denied
libbpf: -- BEGIN DUMP LOG ---
libbpf:
invalid stack off=0 size=1
verification time 6975 usec
stack depth 160+64
processed 889 insns (limit 1000000) max_states_per_insn 4 total_states
14 peak_states 14 mark_read 10
libbpf: -- END LOG --
libbpf: failed to load program 'sysctl_tcp_mem'
libbpf: failed to load object 'test_sysctl_loop2.o'
test_bpf_verif_scale:FAIL:114
#7/21 test_sysctl_loop2.o:FAIL
This actually exposed a bpf program bug. In test_sysctl_loop{1,2}, we have code
like
const char tcp_mem_name[] = "<...long string...>";
...
char name[64];
...
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
In the above code, if sizeof(tcp_mem_name) > 64, name[i] access may be
out of bound. The sizeof(tcp_mem_name) is 59 for test_sysctl_loop1.c and
79 for test_sysctl_loop2.c.
Without promotion-to-global change, old compiler generates code where
the overflowed stack access is actually filled with valid value, so hiding
the bpf program bug. With promotion-to-global change, the code is different,
more specifically, the previous loading constants to stack is gone, and
"name" occupies stack[-64:0] and overflow access triggers a verifier error.
To fix the issue, adjust "name" buffer size properly.
Reported-by: Andrii Nakryiko <andriin(a)fb.com>
Signed-off-by: Yonghong Song <yhs(a)fb.com>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Acked-by: Andrii Nakryiko <andriin(a)fb.com>
Link: https://lore.kernel.org/bpf/20200909171542.3673449-1-yhs@fb.com
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
tools/testing/selftests/bpf/progs/test_sysctl_loop1.c | 4 ++--
tools/testing/selftests/bpf/progs/test_sysctl_loop2.c | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
index 458b0d69133e4..553a282d816ab 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
@@ -18,11 +18,11 @@
#define MAX_ULONG_STR_LEN 7
#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
- volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
index b2e6f9b0894d8..2b64bc563a12e 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
@@ -18,11 +18,11 @@
#define MAX_ULONG_STR_LEN 7
#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx)
{
- volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
--
2.25.1