KUnit does a few expensive things when enabled. This hasn't been a
problem because KUnit was only enabled on test kernels, but with a few
people enabling (but not _using_) KUnit on production systems, we need a
runtime way of handling this.
Provide a 'kunit_running' static key (defaulting to false), which allows
us to hide any KUnit code behind a static branch. This should reduce the
performance impact (on other code) of having KUnit enabled to a single
NOP when no tests are running.
Note that, while it looks unintuitive, tests always run entirely within
__kunit_test_suites_init(), so it's safe to decrement the static key at
the end of this function, rather than in __kunit_test_suites_exit(),
which is only there to clean up results in debugfs.
Signed-off-by: David Gow <davidgow(a)google.com>
---
This should be a no-op (other than a possible performance improvement)
functionality-wise, and lays the groundwork for a more optimised static
stub implementation.
---
include/kunit/test.h | 4 ++++
lib/kunit/test.c | 6 ++++++
2 files changed, 10 insertions(+)
diff --git a/include/kunit/test.h b/include/kunit/test.h
index b1ab6b32216d..450a778a039e 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -16,6 +16,7 @@
#include <linux/container_of.h>
#include <linux/err.h>
#include <linux/init.h>
+#include <linux/jump_label.h>
#include <linux/kconfig.h>
#include <linux/kref.h>
#include <linux/list.h>
@@ -27,6 +28,9 @@
#include <asm/rwonce.h>
+/* Static key: true if any KUnit tests are currently running */
+extern struct static_key_false kunit_running;
+
struct kunit;
/* Size of log associated with test. */
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index 90640a43cf62..314717b63080 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -20,6 +20,8 @@
#include "string-stream.h"
#include "try-catch-impl.h"
+DEFINE_STATIC_KEY_FALSE(kunit_running);
+
#if IS_BUILTIN(CONFIG_KUNIT)
/*
* Fail the current test and print an error message to the log.
@@ -612,10 +614,14 @@ int __kunit_test_suites_init(struct kunit_suite * const * const suites, int num_
return 0;
}
+ static_branch_inc(&kunit_running);
+
for (i = 0; i < num_suites; i++) {
kunit_init_suite(suites[i]);
kunit_run_tests(suites[i]);
}
+
+ static_branch_dec(&kunit_running);
return 0;
}
EXPORT_SYMBOL_GPL(__kunit_test_suites_init);
--
2.38.0.135.g90850a2211-goog
This series cleans up and fixes break_ksm(). In summary, we no longer
use fake write faults to break COW but instead FAULT_FLAG_UNSHARE. Further,
we move away from using follow_page() [that we can hopefully remove
completely at one point] and use new walk_page_range_vma() instead.
Fortunately, we can get rid of VM_FAULT_WRITE and FOLL_MIGRATION in common
code now.
Add a selftest to measure MADV_UNMERGEABLE performance. In my setup
(AMD Ryzen 9 3900X), running the KSM selftest to test unmerge performance
on 2 GiB (taskset 0x8 ./ksm_tests -D -s 2048), this results in a
performance degradation of ~8% -- 9% (old: ~5250 MiB/s, new: ~4800 MiB/s).
I don't think we particularly care for now, but it's good to be aware
of the implication.
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: "Matthew Wilcox (Oracle)" <willy(a)infradead.org>
Cc: Jason Gunthorpe <jgg(a)nvidia.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
David Hildenbrand (7):
selftests/vm: add test to measure MADV_UNMERGEABLE performance
mm/ksm: simplify break_ksm() to not rely on VM_FAULT_WRITE
mm: remove VM_FAULT_WRITE
mm/ksm: fix KSM COW breaking with userfaultfd-wp via
FAULT_FLAG_UNSHARE
mm/pagewalk: add walk_page_range_vma()
mm/ksm: convert break_ksm() to use walk_page_range_vma()
mm/gup: remove FOLL_MIGRATION
include/linux/mm.h | 1 -
include/linux/mm_types.h | 3 -
include/linux/pagewalk.h | 3 +
mm/gup.c | 55 ++-----------
mm/huge_memory.c | 2 +-
mm/ksm.c | 103 +++++++++++++++++++------
mm/memory.c | 9 +--
mm/pagewalk.c | 27 +++++++
tools/testing/selftests/vm/ksm_tests.c | 76 +++++++++++++++++-
9 files changed, 192 insertions(+), 87 deletions(-)
--
2.37.3
TEST(check_file_mmap) will fail when we run the test on tmpfs and report:
mincore_selftest.c:261:check_file_mmap:Expected ra_pages (0) > 0 (0)
mincore_selftest.c:262:check_file_mmap:No read-ahead pages found in memory
For some embaded system, maybe there is only tmpfs file system exist,
run the test will fail, or we install the test on a filesystem that
has no backend, also it will fail as unepected.
So add a checking of block dev for the filesystem at first.
Signed-off-by: Zhao Gongyi <zhaogongyi(a)huawei.com>
---
.../selftests/mincore/mincore_selftest.c | 53 +++++++++++++++++++
1 file changed, 53 insertions(+)
diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c
index 4c88238fc8f0..287351a599a2 100644
--- a/tools/testing/selftests/mincore/mincore_selftest.c
+++ b/tools/testing/selftests/mincore/mincore_selftest.c
@@ -14,6 +14,9 @@
#include <sys/mman.h>
#include <string.h>
#include <fcntl.h>
+#include <mntent.h>
+#include <sys/stat.h>
+#include <linux/fs.h>
#include "../kselftest.h"
#include "../kselftest_harness.h"
@@ -173,6 +176,43 @@ TEST(check_huge_pages)
munmap(addr, page_size);
}
+static struct mntent* find_mount_point(const char *name)
+{
+ struct stat s;
+ FILE *mtab_fp;
+ struct mntent *mountEntry;
+ dev_t devno_of_name;
+
+ if (stat(name, &s) != 0)
+ return NULL;
+
+ devno_of_name = s.st_dev;
+
+ mtab_fp = setmntent("/proc/mounts", "r");
+ if (!mtab_fp)
+ return NULL;
+
+ while ((mountEntry = getmntent(mtab_fp)) != NULL) {
+ if (strcmp(name, mountEntry->mnt_dir) == 0
+ || strcmp(name, mountEntry->mnt_fsname) == 0) {
+ break;
+ }
+
+ if (mountEntry->mnt_fsname[0] == '/'
+ && stat(mountEntry->mnt_fsname, &s) == 0
+ && s.st_rdev == devno_of_name) {
+ break;
+ }
+
+ if (stat(mountEntry->mnt_dir, &s) == 0
+ && s.st_dev == devno_of_name) {
+ break;
+ }
+ }
+ endmntent(mtab_fp);
+
+ return mountEntry;
+}
/*
* Test mincore() behavior on a file-backed page.
@@ -194,6 +234,19 @@ TEST(check_file_mmap)
int fd;
int i;
int ra_pages = 0;
+ struct stat s;
+ struct mntent *mount_entry;
+
+ mount_entry = find_mount_point(".");
+ ASSERT_NE(NULL, mount_entry) {
+ TH_LOG("Find mount point of '.' failed");
+ }
+
+ ASSERT_EQ(0, (stat(mount_entry->mnt_fsname, &s) != 0
+ || !S_ISBLK(s.st_mode))) {
+ TH_LOG("There is no a block dev on mount point, "
+ "test is not supported");
+ }
page_size = sysconf(_SC_PAGESIZE);
vec_size = FILE_SIZE / page_size;
--
2.17.1
When built at -Os, gcc-12 recognizes an strlen() pattern in nolibc_strlen()
and replaces it with a jump to strlen(), which is not defined as a symbol
and breaks compilation. Worse, when the function is called strlen(), the
function is simply replaced with a jump to itself, hence becomes an
infinite loop.
One way to avoid this is to always set -ffreestanding, but the calling
code doesn't know this and there's no way (either via attributes or
pragmas) to globally enable it from include files, effectively leaving
a painful situation for the caller.
It turns out that -fno-tree-loop-distribute-patterns disables replacement
of strlen-like loops with calls to strlen and that this option is accepted
in the optimize() function attribute. Thus at least it allows us to make
sure our local definition is not replaced with a self jump. The function
only needs to be renamed back to strlen() so that the symbol exists, which
implies that nolibc_strlen() which is used on variable strings has to be
declared as a macro that points back to it before the strlen() macro is
redifined.
It was verified to produce valid code with gcc 3.4 to 12.1 at different
optimization levels, and both with constant and variable strings.
Reported-by: kernel test robot <yujie.liu(a)intel.com>
Link: https://lore.kernel.org/r/202210081618.754a77db-yujie.liu@intel.com
Fixes: 66b6f755ad45 ("rcutorture: Import a copy of nolibc")
Fixes: 96980b833a21 ("tools/nolibc/string: do not use __builtin_strlen() at -O0")
Cc: "Paul E. McKenney" <paulmck(a)kernel.org>
Signed-off-by: Willy Tarreau <w(a)1wt.eu>
---
tools/include/nolibc/string.h | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h
index bef35bee9c44..5ef8778cd16f 100644
--- a/tools/include/nolibc/string.h
+++ b/tools/include/nolibc/string.h
@@ -125,10 +125,16 @@ char *strcpy(char *dst, const char *src)
}
/* this function is only used with arguments that are not constants or when
- * it's not known because optimizations are disabled.
+ * it's not known because optimizations are disabled. Note that gcc 12
+ * recognizes an strlen() pattern and replaces it with a jump to strlen(),
+ * thus itself, hence the optimize() attribute below that's meant to disable
+ * this confusing practice.
*/
+#if defined(__GNUC__) && (__GNUC__ >= 12)
+__attribute__((optimize("no-tree-loop-distribute-patterns")))
+#endif
static __attribute__((unused))
-size_t nolibc_strlen(const char *str)
+size_t strlen(const char *str)
{
size_t len;
@@ -140,13 +146,12 @@ size_t nolibc_strlen(const char *str)
* the two branches, then will rely on an external definition of strlen().
*/
#if defined(__OPTIMIZE__)
+#define nolibc_strlen(x) strlen(x)
#define strlen(str) ({ \
__builtin_constant_p((str)) ? \
__builtin_strlen((str)) : \
nolibc_strlen((str)); \
})
-#else
-#define strlen(str) nolibc_strlen((str))
#endif
static __attribute__((unused))
--
2.35.3