Subject: Introduction: I am a Linux and open source software enthusiast
Greetings from Singapore,
My name is Mr. Turritopsis Dohrnii Teo En Ming, 43 years old as of 25
September 2021. My country is Singapore. Presently I am an IT
Consultant with a System Integrator (SI)/computer firm in Singapore. I
am also a Linux and open source software and information technology
enthusiast.
You can read my autobiography on my redundant blogs. The title of my
autobiography is:
"Autobiography of Singaporean Targeted Individual Mr. Turritopsis
Dohrnii Teo En Ming (Very First Draft, Lots More to Add in Future)"
Links to my redundant blogs (Blogger and Wordpress) can be found in my
email signature below. These are my main blogs.
I have three other redundant blogs, namely:
https://teo-en-ming.tumblr.com/https://teo-en-ming.medium.com/https://teo-en-ming.livejournal.com/
Future/subsequent versions of my autobiography will be published on my
redundant blogs.
My Blog Books (in PDF format) are also available for download on my
redundant blogs.
I have also published many guides, howtos, tutorials, and information
technology articles on my redundant blogs.
Thank you very much.
-----BEGIN EMAIL SIGNATURE-----
The Gospel for all Targeted Individuals (TIs):
[The New York Times] Microwave Weapons Are Prime Suspect in Ills of
U.S. Embassy Workers
Link:
https://www.nytimes.com/2018/09/01/science/sonic-attack-cuba-microwave.html
********************************************************************************************
Singaporean Targeted Individual Mr. Turritopsis Dohrnii Teo En Ming's
Academic Qualifications as at 14 Feb 2019 and refugee seeking attempts
at the United Nations Refugee Agency Bangkok (21 Mar 2017), in Taiwan
(5 Aug 2019) and Australia (25 Dec 2019 to 9 Jan 2020):
[1] https://tdtemcerts.wordpress.com/
[2] https://tdtemcerts.blogspot.sg/
[3] https://www.scribd.com/user/270125049/Teo-En-Ming
-----END EMAIL SIGNATURE-----
Currently, the test decides whether or not to test certain features
(e.g., writeprotect support) essentially by examining command-line
arguments. For example, if we're testing anonymous memory, then we
should test writeprotect support as well (since it generally is
supported for anonymous).
This is broken, however. Take writeprotect support as an example: sure
it's supported for anon, but it also requires that we have
CONFIG_HAVE_ARCH_USERFAULTFD_WP. I.e., it is not supported at all on
aarch64. So, running the test on such an arch fails: it tries to test
writeprotect for anon, but since it isn't *actually* supported, it
fails.
So, instead of checking command-line arguments to the test, check the
features the way the UFFD API intends: when we open a new userfaultfd,
pass in the feature(s) this test case would like to try to exercise. The
kernel reports back a subset of those features which are actually
supported: check these returned flags to see if the features are
*actually* supported.
(For a couple of cases, where *registration* would fail [with -EINVAL]
even though UFFDIO_API reports the feature as supported, we have to
check test_type as well as the feature flag.)
In some cases, we check immediately after opening the userfaultfd, and
if the features are missing, we skip the entire test. In some other
cases, we can proceed with "most" of the test, only skipping a few
pieces.
This lets us remove the global test_uffdio_wp and test_uffdio_minor
variables entirely.
Signed-off-by: Axel Rasmussen <axelrasmussen(a)google.com>
---
tools/testing/selftests/vm/userfaultfd.c | 94 +++++++++++-------------
1 file changed, 43 insertions(+), 51 deletions(-)
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 10ab56c2484a..2366caf90435 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -79,10 +79,6 @@ static int test_type;
#define ALARM_INTERVAL_SECS 10
static volatile bool test_uffdio_copy_eexist = true;
static volatile bool test_uffdio_zeropage_eexist = true;
-/* Whether to test uffd write-protection */
-static bool test_uffdio_wp = false;
-/* Whether to test uffd minor faults */
-static bool test_uffdio_minor = false;
static bool map_shared;
static int shm_fd;
@@ -90,6 +86,7 @@ static int huge_fd;
static char *huge_fd_off0;
static unsigned long long *count_verify;
static int uffd = -1;
+static uint64_t uffd_features;
static int uffd_flags, finished, *pipefd;
static char *area_src, *area_src_alias, *area_dst, *area_dst_alias;
static char *zeropage;
@@ -345,7 +342,7 @@ static struct uffd_test_ops hugetlb_uffd_test_ops = {
static struct uffd_test_ops *uffd_test_ops;
-static void userfaultfd_open(uint64_t *features)
+static void userfaultfd_open(uint64_t features)
{
struct uffdio_api uffdio_api;
@@ -355,14 +352,20 @@ static void userfaultfd_open(uint64_t *features)
uffd_flags = fcntl(uffd, F_GETFD, NULL);
uffdio_api.api = UFFD_API;
- uffdio_api.features = *features;
+ uffdio_api.features = features;
if (ioctl(uffd, UFFDIO_API, &uffdio_api))
err("UFFDIO_API failed.\nPlease make sure to "
"run with either root or ptrace capability.");
if (uffdio_api.api != UFFD_API)
err("UFFDIO_API error: %" PRIu64, (uint64_t)uffdio_api.api);
- *features = uffdio_api.features;
+ uffd_features = uffdio_api.features;
+}
+
+static inline bool uffd_wp_supported(void)
+{
+ return test_type == TEST_ANON &&
+ (uffd_features & UFFD_FEATURE_PAGEFAULT_FLAG_WP);
}
static inline void munmap_area(void **area)
@@ -397,6 +400,7 @@ static void uffd_test_ctx_clear(void)
err("close uffd");
uffd = -1;
}
+ uffd_features = 0;
huge_fd_off0 = NULL;
munmap_area((void **)&area_src);
@@ -405,7 +409,7 @@ static void uffd_test_ctx_clear(void)
munmap_area((void **)&area_dst_alias);
}
-static void uffd_test_ctx_init_ext(uint64_t *features)
+static void uffd_test_ctx_init(uint64_t features)
{
unsigned long nr, cpu;
@@ -445,11 +449,6 @@ static void uffd_test_ctx_init_ext(uint64_t *features)
err("pipe");
}
-static inline void uffd_test_ctx_init(uint64_t features)
-{
- uffd_test_ctx_init_ext(&features);
-}
-
static int my_bcmp(char *str1, char *str2, size_t n)
{
unsigned long i;
@@ -587,7 +586,7 @@ static int __copy_page(int ufd, unsigned long offset, bool retry)
uffdio_copy.dst = (unsigned long) area_dst + offset;
uffdio_copy.src = (unsigned long) area_src + offset;
uffdio_copy.len = page_size;
- if (test_uffdio_wp)
+ if (uffd_wp_supported())
uffdio_copy.mode = UFFDIO_COPY_MODE_WP;
else
uffdio_copy.mode = 0;
@@ -778,7 +777,7 @@ static void *background_thread(void *arg)
* at least the first half of the pages mapped already which
* can be write-protected for testing
*/
- if (test_uffdio_wp)
+ if (uffd_wp_supported())
wp_range(uffd, (unsigned long)area_dst + start_nr * page_size,
nr_pages_per_cpu * page_size, true);
@@ -1062,12 +1061,12 @@ static int userfaultfd_zeropage_test(void)
printf("testing UFFDIO_ZEROPAGE: ");
fflush(stdout);
- uffd_test_ctx_init(0);
+ uffd_test_ctx_init(UFFD_FEATURE_PAGEFAULT_FLAG_WP);
uffdio_register.range.start = (unsigned long) area_dst;
uffdio_register.range.len = nr_pages * page_size;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
- if (test_uffdio_wp)
+ if (uffd_wp_supported())
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
err("register failure");
@@ -1089,7 +1088,7 @@ static int userfaultfd_events_test(void)
struct uffdio_register uffdio_register;
unsigned long expected_ioctls;
pthread_t uffd_mon;
- int err, features;
+ int err;
pid_t pid;
char c;
struct uffd_stats stats = { 0 };
@@ -1097,16 +1096,15 @@ static int userfaultfd_events_test(void)
printf("testing events (fork, remap, remove): ");
fflush(stdout);
- features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
- UFFD_FEATURE_EVENT_REMOVE;
- uffd_test_ctx_init(features);
+ uffd_test_ctx_init(UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
+ UFFD_FEATURE_EVENT_REMOVE | UFFD_FEATURE_PAGEFAULT_FLAG_WP);
fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
uffdio_register.range.start = (unsigned long) area_dst;
uffdio_register.range.len = nr_pages * page_size;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
- if (test_uffdio_wp)
+ if (uffd_wp_supported())
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
err("register failure");
@@ -1144,7 +1142,7 @@ static int userfaultfd_sig_test(void)
unsigned long expected_ioctls;
unsigned long userfaults;
pthread_t uffd_mon;
- int err, features;
+ int err;
pid_t pid;
char c;
struct uffd_stats stats = { 0 };
@@ -1152,15 +1150,15 @@ static int userfaultfd_sig_test(void)
printf("testing signal delivery: ");
fflush(stdout);
- features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS;
- uffd_test_ctx_init(features);
+ uffd_test_ctx_init(UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_SIGBUS |
+ UFFD_FEATURE_PAGEFAULT_FLAG_WP);
fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
uffdio_register.range.start = (unsigned long) area_dst;
uffdio_register.range.len = nr_pages * page_size;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
- if (test_uffdio_wp)
+ if (uffd_wp_supported())
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
err("register failure");
@@ -1209,25 +1207,23 @@ static int userfaultfd_minor_test(void)
void *expected_page;
char c;
struct uffd_stats stats = { 0 };
- uint64_t req_features, features_out;
-
- if (!test_uffdio_minor)
- return 0;
+ uint64_t features;
printf("testing minor faults: ");
fflush(stdout);
- if (test_type == TEST_HUGETLB)
- req_features = UFFD_FEATURE_MINOR_HUGETLBFS;
+ if (test_type == TEST_HUGETLB && map_shared)
+ features = UFFD_FEATURE_MINOR_HUGETLBFS;
else if (test_type == TEST_SHMEM)
- req_features = UFFD_FEATURE_MINOR_SHMEM;
- else
- return 1;
+ features = UFFD_FEATURE_MINOR_SHMEM;
+ else {
+ printf("skipping test due to unsupported memory type\n");
+ return 0;
+ }
- features_out = req_features;
- uffd_test_ctx_init_ext(&features_out);
+ uffd_test_ctx_init(features);
/* If kernel reports required features aren't supported, skip test. */
- if ((features_out & req_features) != req_features) {
+ if ((uffd_features & features) != features) {
printf("skipping test due to lack of feature support\n");
fflush(stdout);
return 0;
@@ -1349,10 +1345,6 @@ static void userfaultfd_pagemap_test(unsigned int test_pgsize)
int pagemap_fd;
uint64_t value;
- /* Pagemap tests uffd-wp only */
- if (!test_uffdio_wp)
- return;
-
/* Not enough memory to test this page size */
if (test_pgsize > nr_pages * page_size)
return;
@@ -1361,7 +1353,12 @@ static void userfaultfd_pagemap_test(unsigned int test_pgsize)
/* Flush so it doesn't flush twice in parent/child later */
fflush(stdout);
- uffd_test_ctx_init(0);
+ uffd_test_ctx_init(UFFD_FEATURE_PAGEFAULT_FLAG_WP);
+ /* Pagemap tests uffd-wp only */
+ if (!uffd_wp_supported()) {
+ printf("skipping test due to lack of feature support\n");
+ return;
+ }
if (test_pgsize > page_size) {
/* This is a thp test */
@@ -1426,7 +1423,7 @@ static int userfaultfd_stress(void)
struct uffdio_register uffdio_register;
struct uffd_stats uffd_stats[nr_cpus];
- uffd_test_ctx_init(0);
+ uffd_test_ctx_init(UFFD_FEATURE_PAGEFAULT_FLAG_WP);
if (posix_memalign(&area, page_size, page_size))
err("out of memory");
@@ -1464,7 +1461,7 @@ static int userfaultfd_stress(void)
uffdio_register.range.start = (unsigned long) area_dst;
uffdio_register.range.len = nr_pages * page_size;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
- if (test_uffdio_wp)
+ if (uffd_wp_supported())
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
err("register failure");
@@ -1513,7 +1510,7 @@ static int userfaultfd_stress(void)
return 1;
/* Clear all the write protections if there is any */
- if (test_uffdio_wp)
+ if (uffd_wp_supported())
wp_range(uffd, (unsigned long)area_dst,
nr_pages * page_size, false);
@@ -1595,8 +1592,6 @@ static void set_test_type(const char *type)
if (!strcmp(type, "anon")) {
test_type = TEST_ANON;
uffd_test_ops = &anon_uffd_test_ops;
- /* Only enable write-protect test for anonymous test */
- test_uffdio_wp = true;
} else if (!strcmp(type, "hugetlb")) {
test_type = TEST_HUGETLB;
uffd_test_ops = &hugetlb_uffd_test_ops;
@@ -1604,13 +1599,10 @@ static void set_test_type(const char *type)
map_shared = true;
test_type = TEST_HUGETLB;
uffd_test_ops = &hugetlb_uffd_test_ops;
- /* Minor faults require shared hugetlb; only enable here. */
- test_uffdio_minor = true;
} else if (!strcmp(type, "shmem")) {
map_shared = true;
test_type = TEST_SHMEM;
uffd_test_ops = &shmem_uffd_test_ops;
- test_uffdio_minor = true;
} else {
err("Unknown test type: %s", type);
}
--
2.33.0.464.g1972c5931b-goog
From: Colin Ian King <colin.king(a)canonical.com>
There is a spelling mistake in an error message. Fix it.
Signed-off-by: Colin Ian King <colin.king(a)canonical.com>
---
tools/testing/selftests/kvm/lib/sparsebit.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c
index a0d0c83d83de..50e0cf41a7dd 100644
--- a/tools/testing/selftests/kvm/lib/sparsebit.c
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -1866,7 +1866,7 @@ void sparsebit_validate_internal(struct sparsebit *s)
* of total bits set.
*/
if (s->num_set != total_bits_set) {
- fprintf(stderr, "Number of bits set missmatch,\n"
+ fprintf(stderr, "Number of bits set mismatch,\n"
" s->num_set: 0x%lx total_bits_set: 0x%lx",
s->num_set, total_bits_set);
--
2.32.0
The kvm_vm_free() statement here is currently dead code, since the loop
in front of it can only be left with the "goto done" that jumps right
after the kvm_vm_free(). Fix it by swapping the locations of the "done"
label and the kvm_vm_free().
Signed-off-by: Thomas Huth <thuth(a)redhat.com>
---
tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c | 3 +--
tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c | 2 +-
2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
index f40fd097cb35..6f6fd189dda3 100644
--- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
@@ -109,8 +109,7 @@ int main(int argc, char *argv[])
}
}
- kvm_vm_free(vm);
-
done:
+ kvm_vm_free(vm);
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
index 7e33a350b053..e683d0ac3e45 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -161,7 +161,7 @@ int main(int argc, char *argv[])
}
}
- kvm_vm_free(vm);
done:
+ kvm_vm_free(vm);
return 0;
}
--
2.27.0
Patch 1 fixes a KVM+rseq bug where KVM's handling of TIF_NOTIFY_RESUME,
e.g. for task migration, clears the flag without informing rseq and leads
to stale data in userspace's rseq struct.
Patch 2 is a cleanup to try and make future bugs less likely. It's also
a baby step towards moving and renaming tracehook_notify_resume() since
it has nothing to do with tracing. It kills me to not do the move/rename
as part of this series, but having a dedicated series/discussion seems
more appropriate given the sheer number of architectures that call
tracehook_notify_resume() and the lack of an obvious home for the code.
Patch 3 is a fix/cleanup to stop overriding x86's unistd_{32,64}.h when
the include path (intentionally) omits tools' uapi headers. KVM's
selftests do exactly that so that they can pick up the uapi headers from
the installed kernel headers, and still use various tools/ headers that
mirror kernel code, e.g. linux/types.h. This allows the new test in
patch 4 to reference __NR_rseq without having to manually define it.
Patch 4 is a regression test for the KVM+rseq bug.
Patch 5 is a cleanup made possible by patch 3.
Sean Christopherson (5):
KVM: rseq: Update rseq when processing NOTIFY_RESUME on xfer to KVM
guest
entry: rseq: Call rseq_handle_notify_resume() in
tracehook_notify_resume()
tools: Move x86 syscall number fallbacks to .../uapi/
KVM: selftests: Add a test for KVM_RUN+rseq to detect task migration
bugs
KVM: selftests: Remove __NR_userfaultfd syscall fallback
arch/arm/kernel/signal.c | 1 -
arch/arm64/kernel/signal.c | 1 -
arch/csky/kernel/signal.c | 4 +-
arch/mips/kernel/signal.c | 4 +-
arch/powerpc/kernel/signal.c | 4 +-
arch/s390/kernel/signal.c | 1 -
include/linux/tracehook.h | 2 +
kernel/entry/common.c | 4 +-
kernel/rseq.c | 4 +-
.../x86/include/{ => uapi}/asm/unistd_32.h | 0
.../x86/include/{ => uapi}/asm/unistd_64.h | 3 -
tools/testing/selftests/kvm/.gitignore | 1 +
tools/testing/selftests/kvm/Makefile | 3 +
tools/testing/selftests/kvm/rseq_test.c | 131 ++++++++++++++++++
14 files changed, 143 insertions(+), 20 deletions(-)
rename tools/arch/x86/include/{ => uapi}/asm/unistd_32.h (100%)
rename tools/arch/x86/include/{ => uapi}/asm/unistd_64.h (83%)
create mode 100644 tools/testing/selftests/kvm/rseq_test.c
--
2.33.0.rc1.237.g0d66db33f3-goog
Synchronous Ethernet networks use a physical layer clock to syntonize
the frequency across different network elements.
Basic SyncE node defined in the ITU-T G.8264 consist of an Ethernet
Equipment Clock (EEC) and have the ability to recover synchronization
from the synchronization inputs - either traffic interfaces or external
frequency sources.
The EEC can synchronize its frequency (syntonize) to any of those sources.
It is also able to select synchronization source through priority tables
and synchronization status messaging. It also provides neccessary
filtering and holdover capabilities
This patch series introduces basic interface for reading the Ethernet
Equipment Clock (EEC) state on a SyncE capable device. This state gives
information about the source of the syntonization signal (ether my port,
or any external one) and the state of EEC. This interface is required\
to implement Synchronization Status Messaging on upper layers.
Next steps:
- add interface to enable source clocks and get information about them
- properly return the EEC_SRC_PORT flag depending on the port recovered
clock being enabled and locked
v2:
- removed whitespace changes
- fix issues reported by test robot
v3:
- Changed naming from SyncE to EEC
- Clarify cover letter and commit message for patch 1
v4:
- Removed sync_source and pin_idx info
- Changed one structure to attributes
- Added EEC_SRC_PORT flag to indicate that the EEC is synchronized
to the recovered clock of a port that returns the state
Maciej Machnikowski (2):
rtnetlink: Add new RTM_GETEECSTATE message to get SyncE status
ice: add support for reading SyncE DPLL state
drivers/net/ethernet/intel/ice/ice.h | 5 ++
.../net/ethernet/intel/ice/ice_adminq_cmd.h | 34 +++++++++
drivers/net/ethernet/intel/ice/ice_common.c | 62 ++++++++++++++++
drivers/net/ethernet/intel/ice/ice_common.h | 4 ++
drivers/net/ethernet/intel/ice/ice_devids.h | 3 +
drivers/net/ethernet/intel/ice/ice_main.c | 29 ++++++++
drivers/net/ethernet/intel/ice/ice_ptp.c | 35 +++++++++
drivers/net/ethernet/intel/ice/ice_ptp_hw.c | 44 ++++++++++++
drivers/net/ethernet/intel/ice/ice_ptp_hw.h | 22 ++++++
include/linux/netdevice.h | 6 ++
include/uapi/linux/if_link.h | 31 ++++++++
include/uapi/linux/rtnetlink.h | 3 +
net/core/rtnetlink.c | 71 +++++++++++++++++++
security/selinux/nlmsgtab.c | 3 +-
14 files changed, 351 insertions(+), 1 deletion(-)
--
2.26.3
During initialization of a signal testcase, features declared as required
are properly checked against the running system but no action is then taken
to effectively skip such a testcase.
Fix core signals test logic to abort initialization and report such a
testcase as skipped to the KSelfTest framework.
Fixes: f96bf4340316 ("kselftest: arm64: mangle_pstate_invalid_compat_toggle and common utils")
Signed-off-by: Cristian Marussi <cristian.marussi(a)arm.com>
---
As a consequence KSelfTest TAP results will now report this when a signal-SVE
testcase run on a system missing SVE:
\# selftests: arm64: fake_sigreturn_sve_change_vl
\# Registered handlers for all signals.
\# Detected MINSTKSIGSZ:4720
\# Required Features: [ SVE ] NOT supported
\# ==>> completed. SKIP.
\# # FAKE_SIGRETURN_SVE_CHANGE :: Attempt to change SVE VL
\# ok 7 selftests: arm64: fake_sigreturn_sve_change_vl # SKIP
---
tools/testing/selftests/arm64/signal/test_signals_utils.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
index 6836510a522f..22722abc9dfa 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -266,16 +266,19 @@ int test_init(struct tdescr *td)
td->feats_supported |= FEAT_SSBS;
if (getauxval(AT_HWCAP) & HWCAP_SVE)
td->feats_supported |= FEAT_SVE;
- if (feats_ok(td))
+ if (feats_ok(td)) {
fprintf(stderr,
"Required Features: [%s] supported\n",
feats_to_string(td->feats_required &
td->feats_supported));
- else
+ } else {
fprintf(stderr,
"Required Features: [%s] NOT supported\n",
feats_to_string(td->feats_required &
~td->feats_supported));
+ td->result = KSFT_SKIP;
+ return 0;
+ }
}
/* Perform test specific additional initialization */
--
2.17.1
Commit 6a499c9c42d0 ("kunit: tool: make --raw_output support only
showing kunit output") made --raw_output a string-typed argument.
Passing --raw_output=kunit would make it only show KUnit-related output
and not everything.
However, converting it to a string-typed argument had side effects.
These calls used to work:
$ kunit.py run --raw_output
$ kunit.py run --raw_output suite_filter
$ kunit.py run suite_filter --raw_output
But now the second is actually parsed as
$ kunit.py run --raw_output=suite_filter
So the order you add in --raw_output now matters and command lines that
used to work might not anymore.
Change --raw_output back to a boolean flag, but change its behavior to
match that of the former --raw_output=kunit.
The assumption is that this is what most people wanted to see anyways.
To get the old behavior, users can simply do:
$ kunit.py run >/dev/null; cat .kunit/test.log
They don't have any easy way of getting the --raw_output=kunit behavior.
Signed-off-by: Daniel Latypov <dlatypov(a)google.com>
---
Meta: this is an alternative to
https://lore.kernel.org/linux-kselftest/20210903161405.1861312-1-dlatypov@g…
I'd slightly prefer that approach, but if we're fine with giving up the
old --raw_output semantics entirely, this would be cleaner.
I'd also assume that most people would prefer the new semantics, but I'm
not sure of that.
---
Documentation/dev-tools/kunit/kunit-tool.rst | 7 -------
tools/testing/kunit/kunit.py | 12 +++---------
tools/testing/kunit/kunit_tool_test.py | 13 ++++++-------
3 files changed, 9 insertions(+), 23 deletions(-)
diff --git a/Documentation/dev-tools/kunit/kunit-tool.rst b/Documentation/dev-tools/kunit/kunit-tool.rst
index ae52e0f489f9..03404746f1f6 100644
--- a/Documentation/dev-tools/kunit/kunit-tool.rst
+++ b/Documentation/dev-tools/kunit/kunit-tool.rst
@@ -114,13 +114,6 @@ results in TAP format, you can pass the ``--raw_output`` argument.
./tools/testing/kunit/kunit.py run --raw_output
-The raw output from test runs may contain other, non-KUnit kernel log
-lines. You can see just KUnit output with ``--raw_output=kunit``:
-
-.. code-block:: bash
-
- ./tools/testing/kunit/kunit.py run --raw_output=kunit
-
If you have KUnit results in their raw TAP format, you can parse them and print
the human-readable summary with the ``parse`` command for kunit_tool. This
accepts a filename for an argument, or will read from standard input.
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 5a931456e718..3626a56472b5 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -115,13 +115,7 @@ def parse_tests(request: KunitParseRequest) -> KunitResult:
'Tests not Parsed.')
if request.raw_output:
- output: Iterable[str] = request.input_data
- if request.raw_output == 'all':
- pass
- elif request.raw_output == 'kunit':
- output = kunit_parser.extract_tap_lines(output)
- else:
- print(f'Unknown --raw_output option "{request.raw_output}"', file=sys.stderr)
+ output = kunit_parser.extract_tap_lines(request.input_data)
for line in output:
print(line.rstrip())
@@ -256,8 +250,8 @@ def add_exec_opts(parser) -> None:
def add_parse_opts(parser) -> None:
parser.add_argument('--raw_output', help='If set don\'t format output from kernel. '
- 'If set to --raw_output=kunit, filters to just KUnit output.',
- type=str, nargs='?', const='all', default=None)
+ 'It will only show output from KUnit.',
+ action='store_true')
parser.add_argument('--json',
nargs='?',
help='Stores test results in a JSON, and either '
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 619c4554cbff..55ed3dac31ee 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -399,14 +399,13 @@ class KUnitMainTest(unittest.TestCase):
self.assertNotEqual(call, mock.call(StrContains('Testing complete.')))
self.assertNotEqual(call, mock.call(StrContains(' 0 tests run')))
- def test_run_raw_output_kunit(self):
+ def test_run_raw_output_does_not_take_positional_args(self):
+ # --raw_output might eventually support an argument, but we don't want it
+ # to consume any positional arguments, only ones after an '='.
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
- kunit.main(['run', '--raw_output=kunit'], self.linux_source_mock)
- self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
- self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
- for call in self.print_mock.call_args_list:
- self.assertNotEqual(call, mock.call(StrContains('Testing complete.')))
- self.assertNotEqual(call, mock.call(StrContains(' 0 tests run')))
+ kunit.main(['run', '--raw_output', 'filter_glob'], self.linux_source_mock)
+ self.linux_source_mock.run_kernel.assert_called_once_with(
+ args=None, build_dir='.kunit', filter_glob='filter_glob', timeout=300)
def test_exec_timeout(self):
timeout = 3453
base-commit: 316346243be6df12799c0b64b788e06bad97c30b
--
2.33.0.464.g1972c5931b-goog
Problem:
What does this do?
$ kunit.py run --json
Well, it runs all the tests and prints test results out as JSON.
And next is
$ kunit.py run my-test-suite --json
This runs just `my-test-suite` and prints results out as JSON.
But what about?
$ kunit.py run --json my-test-suite
This runs all the tests and stores the json results in a "my-test-suite"
file.
Why:
--json, and now --raw_output are actually string flags. They just have a
default value. --json in particular takes the name of an output file.
It was intended that you'd do
$ kunit.py run --json=my_output_file my-test-suite
if you ever wanted to specify the value.
Workaround:
It doesn't seem like there's a way to make
https://docs.python.org/3/library/argparse.html only accept arg values
after a '='.
I believe that `--json` should "just work" regardless of where it is.
So this patch automatically rewrites a bare `--json` to `--json=stdout`.
That makes the examples above work the same way.
Add a regression test that can catch this for --raw_output.
Fixes: 6a499c9c42d0 ("kunit: tool: make --raw_output support only showing kunit output")
Signed-off-by: Daniel Latypov <dlatypov(a)google.com>
---
tools/testing/kunit/kunit.py | 24 ++++++++++++++++++++++--
tools/testing/kunit/kunit_tool_test.py | 8 ++++++++
2 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 5a931456e718..95d62020e4f2 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -16,7 +16,7 @@ assert sys.version_info >= (3, 7), "Python version is too old"
from collections import namedtuple
from enum import Enum, auto
-from typing import Iterable
+from typing import Iterable, Sequence
import kunit_config
import kunit_json
@@ -186,6 +186,26 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree,
exec_result.elapsed_time))
return parse_result
+# Problem:
+# $ kunit.py run --json
+# works as one would expect and prints the parsed test results as JSON.
+# $ kunit.py run --json suite_name
+# would *not* pass suite_name as the filter_glob and print as json.
+# argparse will consider it to be another way of writing
+# $ kunit.py run --json=suite_name
+# i.e. it would run all tests, and dump the json to a `suite_name` file.
+# So we hackily automatically rewrite --json => --json=stdout
+pseudo_bool_flag_defaults = {
+ '--json': 'stdout',
+ '--raw_output': 'kunit',
+}
+def massage_argv(argv: Sequence[str]) -> Sequence[str]:
+ def massage_arg(arg: str) -> str:
+ if arg not in pseudo_bool_flag_defaults:
+ return arg
+ return f'{arg}={pseudo_bool_flag_defaults[arg]}'
+ return map(massage_arg, argv)
+
def add_common_opts(parser) -> None:
parser.add_argument('--build_dir',
help='As in the make command, it specifies the build '
@@ -303,7 +323,7 @@ def main(argv, linux=None):
help='Specifies the file to read results from.',
type=str, nargs='?', metavar='input_file')
- cli_args = parser.parse_args(argv)
+ cli_args = parser.parse_args(massage_argv(argv))
if get_kernel_root_path():
os.chdir(get_kernel_root_path())
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 619c4554cbff..1edcc8373b4e 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -408,6 +408,14 @@ class KUnitMainTest(unittest.TestCase):
self.assertNotEqual(call, mock.call(StrContains('Testing complete.')))
self.assertNotEqual(call, mock.call(StrContains(' 0 tests run')))
+ def test_run_raw_output_does_not_take_positional_args(self):
+ # --raw_output is a string flag, but we don't want it to consume
+ # any positional arguments, only ones after an '='
+ self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
+ kunit.main(['run', '--raw_output', 'filter_glob'], self.linux_source_mock)
+ self.linux_source_mock.run_kernel.assert_called_once_with(
+ args=None, build_dir='.kunit', filter_glob='filter_glob', timeout=300)
+
def test_exec_timeout(self):
timeout = 3453
kunit.main(['exec', '--timeout', str(timeout)], self.linux_source_mock)
base-commit: a9c9a6f741cdaa2fa9ba24a790db8d07295761e3
--
2.33.0.153.gba50c8fa24-goog
This test assumes that the declared kunit_suite object is the exact one
which is being executed, which KUnit will not guarantee [1].
Specifically, `suite->log` is not initialized until a suite object is
executed. So if KUnit makes a copy of the suite and runs that instead,
this test dereferences an invalid pointer and (hopefully) segfaults.
N.B. since we no longer assume this, we can no longer verify that
`suite->log` is *not* allocated during normal execution.
An alternative to this patch that would allow us to test that would
require exposing an API for the current test to get its current suite.
Exposing that for one internal kunit test seems like overkill, and
grants users more footguns (e.g. reusing a test case in multiple suites
and changing behavior based on the suite name, dynamically modifying the
setup/cleanup funcs, storing/reading stuff out of the suite->log, etc.).
[1] In a subsequent patch, KUnit will allow running subsets of test
cases within a suite by making a copy of the suite w/ the filtered test
list. But there are other reasons KUnit might execute a copy, e.g. if it
ever wants to support parallel execution of different suites, recovering
from errors and restarting suites
Signed-off-by: Daniel Latypov <dlatypov(a)google.com>
Reviewed-by: Brendan Higgins <brendanhiggins(a)google.com>
---
lib/kunit/kunit-test.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c
index d69efcbed624..555601d17f79 100644
--- a/lib/kunit/kunit-test.c
+++ b/lib/kunit/kunit-test.c
@@ -415,12 +415,15 @@ static struct kunit_suite kunit_log_test_suite = {
static void kunit_log_test(struct kunit *test)
{
- struct kunit_suite *suite = &kunit_log_test_suite;
+ struct kunit_suite suite;
+
+ suite.log = kunit_kzalloc(test, KUNIT_LOG_SIZE, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, suite.log);
kunit_log(KERN_INFO, test, "put this in log.");
kunit_log(KERN_INFO, test, "this too.");
- kunit_log(KERN_INFO, suite, "add to suite log.");
- kunit_log(KERN_INFO, suite, "along with this.");
+ kunit_log(KERN_INFO, &suite, "add to suite log.");
+ kunit_log(KERN_INFO, &suite, "along with this.");
#ifdef CONFIG_KUNIT_DEBUGFS
KUNIT_EXPECT_NOT_ERR_OR_NULL(test,
@@ -428,12 +431,11 @@ static void kunit_log_test(struct kunit *test)
KUNIT_EXPECT_NOT_ERR_OR_NULL(test,
strstr(test->log, "this too."));
KUNIT_EXPECT_NOT_ERR_OR_NULL(test,
- strstr(suite->log, "add to suite log."));
+ strstr(suite.log, "add to suite log."));
KUNIT_EXPECT_NOT_ERR_OR_NULL(test,
- strstr(suite->log, "along with this."));
+ strstr(suite.log, "along with this."));
#else
KUNIT_EXPECT_PTR_EQ(test, test->log, (char *)NULL);
- KUNIT_EXPECT_PTR_EQ(test, suite->log, (char *)NULL);
#endif
}
base-commit: 316346243be6df12799c0b64b788e06bad97c30b
--
2.33.0.309.g3052b89438-goog
On Tue, Sep 21, 2021 at 1:24 AM David Laight <David.Laight(a)aculab.com> wrote:
>
> From: Luis Chamberlain
> > Sent: 17 September 2021 20:47
> >
> > When sysfs attributes use a lock also used on module removal we can
> > race to deadlock. This happens when for instance a sysfs file on
> > a driver is used, then at the same time we have module removal call
> > trigger. The module removal call code holds a lock, and then the sysfs
> > file entry waits for the same lock. While holding the lock the module
> > removal tries to remove the sysfs entries, but these cannot be removed
> > yet as one is waiting for a lock. This won't complete as the lock is
> > already held. Likewise module removal cannot complete, and so we deadlock.
>
> Isn't the real problem the race between a sysfs file action and the
> removal of the sysfs node?
Nope, that is taken care of by kernfs.
> This isn't really related to module unload - except that may
> well remove some sysfs nodes.
Nope, the issue is a deadlock that can happen due to a shared lock on
module removal and a driver sysfs operation.
> This is the same problem as removing any other kind of driver callback.
> There are three basic solutions:
> 1) Use a global lock - not usually useful.
> 2) Have the remove call sleep until any callbacks are complete.
> 3) Have the remove just request removal and have a final
> callback (from a different context).
Kernfs already does a sort of combination of 1) and 2) but 1) is using
atomic reference counts.
> If the remove can sleep (as in 2) then there is a requirement
> on the driver code to not hold any locks across the 'remove'
> that can be acquired during the callbacks.
And this is the part that kernfs has no control over since the removal
and sysfs operation are implementation specific.
> Now, for sysfs, you probably only want to sleep the remove code
> while a read/write is in progress - not just because the node
> is open.
> That probably requires marking an open node 'invalid' and
> deferring delete to close.
This is already done by kernfs.
> None of this requires a reference count on the module.
You are missing the point to the other aspect of the try_module_get(),
it lets you also check if module exit has been entered. By using
try_module_get() you let the module exit trump proceeding with an
operation, therefore also preventing any potential use of a shared
lock on module exit and the driver specific sysfs operation.
Luis
Makefile uses TEST_PROGS instead of TEST_GEN_PROGS to define
executables. TEST_PROGS is for shell scripts that need to be
installed and run by the common lib.mk framework. The common
framework doesn't touch TEST_PROGS when it does build and clean.
As a result "make kselftest-clean" and "make clean" fail to remove
executables. Run and install work because the common framework runs
and installs TEST_PROGS. Build works because the Makefile defines
"all" rule which is unnecessary if TEST_GEN_PROGS is used.
Use TEST_GEN_PROGS so the common framework can handle build/run/
install/clean properly.
Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org>
---
tools/testing/selftests/net/af_unix/Makefile | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index cfc7f4f97fd1..df341648f818 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,5 +1,2 @@
-##TEST_GEN_FILES := test_unix_oob
-TEST_PROGS := test_unix_oob
+TEST_GEN_PROGS := test_unix_oob
include ../../lib.mk
-
-all: $(TEST_PROGS)
--
2.30.2
The ATTRIBUTE_GROUPS is typically used to avoid boiler plate
code which is used in many drivers. Embracing ATTRIBUTE_GROUPS was
long due on the zram driver, however a recent fix for sysfs allows
users of ATTRIBUTE_GROUPS to also associate a module to the group
attribute.
In zram's case this also means it allows us to fix a race which triggers
a deadlock on the zram driver. This deadlock happens when a sysfs attribute
use a lock also used on module removal. This happens when for instance a
sysfs file on a driver is used, then at the same time we have module
removal call trigger. The module removal call code holds a lock, and then
the sysfs file entry waits for the same lock. While holding the lock the
module removal tries to remove the sysfs entries, but these cannot be
removed yet as one is waiting for a lock. This won't complete as the lock
is already held. Likewise module removal cannot complete, and so we
deadlock.
Sysfs fixes this when the group attributes have a module associated to
it, sysfs will *try* to get a refcount to the module when a shared
lock is used, prior to mucking with a sysfs attribute. If this fails we
just give up right away.
This deadlock was first reported with the zram driver, a sketch of how
this can happen follows:
CPU A CPU B
whatever_store()
module_unload
mutex_lock(foo)
mutex_lock(foo)
del_gendisk(zram->disk);
device_del()
device_remove_groups()
In this situation whatever_store() is waiting for the mutex foo to
become unlocked, but that won't happen until module removal is complete.
But module removal won't complete until the sysfs file being poked
completes which is waiting for a lock already held.
This issue can be reproduced easily on the zram driver as follows:
Loop 1 on one terminal:
while true;
do modprobe zram;
modprobe -r zram;
done
Loop 2 on a second terminal:
while true; do
echo 1024 > /sys/block/zram0/disksize;
echo 1 > /sys/block/zram0/reset;
done
Without this patch we end up in a deadlock, and the following
stack trace is produced which hints to us what the issue was:
INFO: task bash:888 blocked for more than 120 seconds.
Tainted: G E 5.12.0-rc1-next-20210304+ #4
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:bash state:D stack: 0 pid: 888 ppid: 887 flags:<etc>
Call Trace:
__schedule+0x2e4/0x900
schedule+0x46/0xb0
schedule_preempt_disabled+0xa/0x10
__mutex_lock.constprop.0+0x2c3/0x490
? _kstrtoull+0x35/0xd0
reset_store+0x6c/0x160 [zram]
kernfs_fop_write_iter+0x124/0x1b0
new_sync_write+0x11c/0x1b0
vfs_write+0x1c2/0x260
ksys_write+0x5f/0xe0
do_syscall_64+0x33/0x80
entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7f34f2c3df33
RSP: 002b:00007ffe751df6e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f34f2c3df33
RDX: 0000000000000002 RSI: 0000561ccb06ec10 RDI: 0000000000000001
RBP: 0000561ccb06ec10 R08: 000000000000000a R09: 0000000000000001
R10: 0000561ccb157590 R11: 0000000000000246 R12: 0000000000000002
R13: 00007f34f2d0e6a0 R14: 0000000000000002 R15: 00007f34f2d0e8a0
INFO: task modprobe:1104 can't die for more than 120 seconds.
task:modprobe state:D stack: 0 pid: 1104 ppid: 916 flags:<etc>
Call Trace:
__schedule+0x2e4/0x900
schedule+0x46/0xb0
__kernfs_remove.part.0+0x228/0x2b0
? finish_wait+0x80/0x80
kernfs_remove_by_name_ns+0x50/0x90
remove_files+0x2b/0x60
sysfs_remove_group+0x38/0x80
sysfs_remove_groups+0x29/0x40
device_remove_attrs+0x4a/0x80
device_del+0x183/0x3e0
? mutex_lock+0xe/0x30
del_gendisk+0x27a/0x2d0
zram_remove+0x8a/0xb0 [zram]
? hot_remove_store+0xf0/0xf0 [zram]
zram_remove_cb+0xd/0x10 [zram]
idr_for_each+0x5e/0xd0
destroy_devices+0x39/0x6f [zram]
__do_sys_delete_module+0x190/0x2a0
do_syscall_64+0x33/0x80
entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7f32adf727d7
RSP: 002b:00007ffc08bb38a8 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
RAX: ffffffffffffffda RBX: 000055eea23cbb10 RCX: 00007f32adf727d7
RDX: 0000000000000000 RSI: 0000000000000800 RDI: 000055eea23cbb78
RBP: 000055eea23cbb10 R08: 0000000000000000 R09: 0000000000000000
R10: 00007f32adfe5ac0 R11: 0000000000000206 R12: 000055eea23cbb78
R13: 0000000000000000 R14: 0000000000000000 R15: 000055eea23cbc20
[0] https://lkml.kernel.org/r/20210401235925.GR4332@42.do-not-panic.com
Signed-off-by: Luis Chamberlain <mcgrof(a)kernel.org>
---
drivers/block/zram/zram_drv.c | 11 ++---------
1 file changed, 2 insertions(+), 9 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index b26abcb955cc..60a55ae8cd91 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1902,14 +1902,7 @@ static struct attribute *zram_disk_attrs[] = {
NULL,
};
-static const struct attribute_group zram_disk_attr_group = {
- .attrs = zram_disk_attrs,
-};
-
-static const struct attribute_group *zram_disk_attr_groups[] = {
- &zram_disk_attr_group,
- NULL,
-};
+ATTRIBUTE_GROUPS(zram_disk);
/*
* Allocate and initialize new zram device. the function returns
@@ -1981,7 +1974,7 @@ static int zram_add(void)
blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
- device_add_disk(NULL, zram->disk, zram_disk_attr_groups);
+ device_add_disk(NULL, zram->disk, zram_disk_groups);
strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
--
2.30.2
Provide a simple state machine to fix races with driver exit where we
remove the CPU multistate callbacks and re-initialization / creation of
new per CPU instances which should be managed by these callbacks.
The zram driver makes use of cpu hotplug multistate support, whereby it
associates a struct zcomp per CPU. Each struct zcomp represents a
compression algorithm in charge of managing compression streams per
CPU. Although a compiled zram driver only supports a fixed set of
compression algorithms, each zram device gets a struct zcomp allocated
per CPU. The "multi" in CPU hotplug multstate refers to these per
cpu struct zcomp instances. Each of these will have the CPU hotplug
callback called for it on CPU plug / unplug. The kernel's CPU hotplug
multistate keeps a linked list of these different structures so that
it will iterate over them on CPU transitions.
By default at driver initialization we will create just one zram device
(num_devices=1) and a zcomp structure then set for the now default
lzo-rle comrpession algorithm. At driver removal we first remove each
zram device, and so we destroy the associated struct zcomp per CPU. But
since we expose sysfs attributes to create new devices or reset /
initialize existing zram devices, we can easily end up re-initializing
a struct zcomp for a zram device before the exit routine of the module
removes the cpu hotplug callback. When this happens the kernel's CPU
hotplug will detect that at least one instance (struct zcomp for us)
exists. This can happen in the following situation:
CPU 1 CPU 2
disksize_store(...);
class_unregister(...);
idr_for_each(...);
zram_debugfs_destroy();
idr_destroy(...);
unregister_blkdev(...);
cpuhp_remove_multi_state(...);
The warning comes up on cpuhp_remove_multi_state() when it sees that the
state for CPUHP_ZCOMP_PREPARE does not have an empty instance linked list.
In this case, that a struct zcom still exists, the driver allowed its
creation per CPU even though we could have just freed them per CPU
though a call on another CPU, and we are then later trying to remove the
hotplug callback.
Fix all this by providing a zram initialization boolean
protected the shared in the driver zram_index_mutex, which we
can use to annotate when sysfs attributes are safe to use or
not -- once the driver is properly initialized. When the driver
is going down we also are sure to not let userspace muck with
attributes which may affect each per cpu struct zcomp.
This also fixes a series of possible memory leaks. The
crashes and memory leaks can easily be caused by issuing
the zram02.sh script from the LTP project [0] in a loop
in two separate windows:
cd testcases/kernel/device-drivers/zram
while true; do PATH=$PATH:$PWD:$PWD/../../../lib/ ./zram02.sh; done
You end up with a splat as follows:
kernel: zram: Removed device: zram0
kernel: zram: Added device: zram0
kernel: zram0: detected capacity change from 0 to 209715200
kernel: Adding 104857596k swap on /dev/zram0. <etc>
kernel: zram0: detected capacitky change from 209715200 to 0
kernel: zram0: detected capacity change from 0 to 209715200
kernel: ------------[ cut here ]------------
kernel: Error: Removing state 63 which has instances left.
kernel: WARNING: CPU: 7 PID: 70457 at \
kernel/cpu.c:2069 __cpuhp_remove_state_cpuslocked+0xf9/0x100
kernel: Modules linked in: zram(E-) zsmalloc(E) <etc>
kernel: CPU: 7 PID: 70457 Comm: rmmod Tainted: G \
E 5.12.0-rc1-next-20210304 #3
kernel: Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), \
BIOS 1.14.0-2 04/01/2014
kernel: RIP: 0010:__cpuhp_remove_state_cpuslocked+0xf9/0x100
kernel: Code: <etc>
kernel: RSP: 0018:ffffa800c139be98 EFLAGS: 00010282
kernel: RAX: 0000000000000000 RBX: ffffffff9083db58 RCX: ffff9609f7dd86d8
kernel: RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffff9609f7dd86d0
kernel: RBP: 0000000000000000i R08: 0000000000000000 R09: ffffa800c139bcb8
kernel: R10: ffffa800c139bcb0 R11: ffffffff908bea40 R12: 000000000000003f
kernel: R13: 00000000000009d8 R14: 0000000000000000 R15: 0000000000000000
kernel: FS: 00007f1b075a7540(0000) GS:ffff9609f7dc0000(0000) knlGS:<etc>
kernel: CS: 0010 DS: 0000 ES 0000 CR0: 0000000080050033
kernel: CR2: 00007f1b07610490 CR3: 00000001bd04e000 CR4: 0000000000350ee0
kernel: Call Trace:
kernel: __cpuhp_remove_state+0x2e/0x80
kernel: __do_sys_delete_module+0x190/0x2a0
kernel: do_syscall_64+0x33/0x80
kernel: entry_SYSCALL_64_after_hwframe+0x44/0xae
The "Error: Removing state 63 which has instances left" refers
to the zram per CPU struct zcomp instances left.
[0] https://github.com/linux-test-project/ltp.git
Acked-by: Minchan Kim <minchan(a)kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof(a)kernel.org>
---
drivers/block/zram/zram_drv.c | 63 ++++++++++++++++++++++++++++++-----
1 file changed, 55 insertions(+), 8 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index f61910c65f0f..b26abcb955cc 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -44,6 +44,8 @@ static DEFINE_MUTEX(zram_index_mutex);
static int zram_major;
static const char *default_compressor = CONFIG_ZRAM_DEF_COMP;
+static bool zram_up;
+
/* Module params (documentation at end) */
static unsigned int num_devices = 1;
/*
@@ -1704,6 +1706,7 @@ static void zram_reset_device(struct zram *zram)
comp = zram->comp;
disksize = zram->disksize;
zram->disksize = 0;
+ zram->comp = NULL;
set_capacity_and_notify(zram->disk, 0);
part_stat_set_all(zram->disk->part0, 0);
@@ -1724,9 +1727,18 @@ static ssize_t disksize_store(struct device *dev,
struct zram *zram = dev_to_zram(dev);
int err;
+ mutex_lock(&zram_index_mutex);
+
+ if (!zram_up) {
+ err = -ENODEV;
+ goto out;
+ }
+
disksize = memparse(buf, NULL);
- if (!disksize)
- return -EINVAL;
+ if (!disksize) {
+ err = -EINVAL;
+ goto out;
+ }
down_write(&zram->init_lock);
if (init_done(zram)) {
@@ -1754,12 +1766,16 @@ static ssize_t disksize_store(struct device *dev,
set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
up_write(&zram->init_lock);
+ mutex_unlock(&zram_index_mutex);
+
return len;
out_free_meta:
zram_meta_free(zram, disksize);
out_unlock:
up_write(&zram->init_lock);
+out:
+ mutex_unlock(&zram_index_mutex);
return err;
}
@@ -1775,8 +1791,17 @@ static ssize_t reset_store(struct device *dev,
if (ret)
return ret;
- if (!do_reset)
- return -EINVAL;
+ mutex_lock(&zram_index_mutex);
+
+ if (!zram_up) {
+ len = -ENODEV;
+ goto out;
+ }
+
+ if (!do_reset) {
+ len = -EINVAL;
+ goto out;
+ }
zram = dev_to_zram(dev);
bdev = zram->disk->part0;
@@ -1785,7 +1810,8 @@ static ssize_t reset_store(struct device *dev,
/* Do not reset an active device or claimed device */
if (bdev->bd_openers || zram->claim) {
mutex_unlock(&bdev->bd_disk->open_mutex);
- return -EBUSY;
+ len = -EBUSY;
+ goto out;
}
/* From now on, anyone can't open /dev/zram[0-9] */
@@ -1800,6 +1826,8 @@ static ssize_t reset_store(struct device *dev,
zram->claim = false;
mutex_unlock(&bdev->bd_disk->open_mutex);
+out:
+ mutex_unlock(&zram_index_mutex);
return len;
}
@@ -2010,6 +2038,10 @@ static ssize_t hot_add_show(struct class *class,
int ret;
mutex_lock(&zram_index_mutex);
+ if (!zram_up) {
+ mutex_unlock(&zram_index_mutex);
+ return -ENODEV;
+ }
ret = zram_add();
mutex_unlock(&zram_index_mutex);
@@ -2037,6 +2069,11 @@ static ssize_t hot_remove_store(struct class *class,
mutex_lock(&zram_index_mutex);
+ if (!zram_up) {
+ ret = -ENODEV;
+ goto out;
+ }
+
zram = idr_find(&zram_index_idr, dev_id);
if (zram) {
ret = zram_remove(zram);
@@ -2046,6 +2083,7 @@ static ssize_t hot_remove_store(struct class *class,
ret = -ENODEV;
}
+out:
mutex_unlock(&zram_index_mutex);
return ret ? ret : count;
}
@@ -2072,12 +2110,15 @@ static int zram_remove_cb(int id, void *ptr, void *data)
static void destroy_devices(void)
{
+ mutex_lock(&zram_index_mutex);
+ zram_up = false;
class_unregister(&zram_control_class);
idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
zram_debugfs_destroy();
idr_destroy(&zram_index_idr);
unregister_blkdev(zram_major, "zram");
cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
+ mutex_unlock(&zram_index_mutex);
}
static int __init zram_init(void)
@@ -2105,15 +2146,21 @@ static int __init zram_init(void)
return -EBUSY;
}
+ mutex_lock(&zram_index_mutex);
+
while (num_devices != 0) {
- mutex_lock(&zram_index_mutex);
ret = zram_add();
- mutex_unlock(&zram_index_mutex);
- if (ret < 0)
+ if (ret < 0) {
+ mutex_unlock(&zram_index_mutex);
goto out_error;
+ }
num_devices--;
}
+ zram_up = true;
+
+ mutex_unlock(&zram_index_mutex);
+
return 0;
out_error:
--
2.30.2
Now that sysfs has the deadlock race fixed with module removal,
enable the deadlock tests module removal tests. They were left
disabled by default as otherwise you would deadlock your system
./tools/testing/selftests/sysfs/sysfs.sh -t 0027
Running test: sysfs_test_0027 - run #0
Test for possible rmmod deadlock while writing x ... ok
./tools/testing/selftests/sysfs/sysfs.sh -t 0028
Running test: sysfs_test_0028 - run #0
Test for possible rmmod deadlock using rtnl_lock while writing x ... ok
Signed-off-by: Luis Chamberlain <mcgrof(a)kernel.org>
---
tools/testing/selftests/sysfs/sysfs.sh | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/sysfs/sysfs.sh b/tools/testing/selftests/sysfs/sysfs.sh
index f928635d0e35..4047ac48e764 100755
--- a/tools/testing/selftests/sysfs/sysfs.sh
+++ b/tools/testing/selftests/sysfs/sysfs.sh
@@ -60,8 +60,8 @@ ALL_TESTS="$ALL_TESTS 0023:1:1:test_dev_y:block"
ALL_TESTS="$ALL_TESTS 0024:1:1:test_dev_x:block"
ALL_TESTS="$ALL_TESTS 0025:1:1:test_dev_y:block"
ALL_TESTS="$ALL_TESTS 0026:1:1:test_dev_y:block"
-ALL_TESTS="$ALL_TESTS 0027:1:0:test_dev_x:block" # deadlock test
-ALL_TESTS="$ALL_TESTS 0028:1:0:test_dev_x:block" # deadlock test with rntl_lock
+ALL_TESTS="$ALL_TESTS 0027:1:1:test_dev_x:block" # deadlock test
+ALL_TESTS="$ALL_TESTS 0028:1:1:test_dev_x:block" # deadlock test with rntl_lock
ALL_TESTS="$ALL_TESTS 0029:1:1:test_dev_x:block" # kernfs race removal of store
ALL_TESTS="$ALL_TESTS 0030:1:1:test_dev_x:block" # kernfs race removal before mutex
ALL_TESTS="$ALL_TESTS 0031:1:1:test_dev_x:block" # kernfs race removal after mutex
--
2.30.2
If one ends up expanding on this line checkpatch will complain that the
combination S_IRWXU|S_IRUGO|S_IXUGO should just be replaced with the
octal 0755. Do that.
This makes no functional changes.
Signed-off-by: Luis Chamberlain <mcgrof(a)kernel.org>
---
fs/sysfs/dir.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 59dffd5ca517..b6b6796e1616 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -56,8 +56,7 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
kobject_get_ownership(kobj, &uid, &gid);
- kn = kernfs_create_dir_ns(parent, kobject_name(kobj),
- S_IRWXU | S_IRUGO | S_IXUGO, uid, gid,
+ kn = kernfs_create_dir_ns(parent, kobject_name(kobj), 0755, uid, gid,
kobj, ns);
if (IS_ERR(kn)) {
if (PTR_ERR(kn) == -EEXIST)
--
2.30.2
If one ends up extending this line checkpatch will complain about the
use of S_IRWXUGO suggesting it is not preferred and that 0777
should be used instead. Take the tip from checkpatch and do that
change before we do our subsequent changes.
This makes no functional changes.
Signed-off-by: Luis Chamberlain <mcgrof(a)kernel.org>
---
fs/kernfs/symlink.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index c8f8e41b8411..19a6c71c6ff5 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -36,8 +36,7 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
gid = target->iattr->ia_gid;
}
- kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, uid, gid,
- KERNFS_LINK);
+ kn = kernfs_new_node(parent, name, S_IFLNK|0777, uid, gid, KERNFS_LINK);
if (!kn)
return ERR_PTR(-ENOMEM);
--
2.30.2
There is quite a bit of tribal knowledge around proper use of
try_module_get() and that it must be used only in a context which
can ensure the module won't be gone during the operation. Document
this little bit of tribal knowledge.
I'm extending this tribal knowledge with new developments which it
seems some folks do not yet believe to be true: we can be sure a
module will exist during the lifetime of a sysfs file operation.
For proof, refer to test_sysfs test #32:
./tools/testing/selftests/sysfs/sysfs.sh -t 0032
Without this being true, the write would fail or worse,
a crash would happen, in this test. It does not.
Signed-off-by: Luis Chamberlain <mcgrof(a)kernel.org>
---
include/linux/module.h | 34 ++++++++++++++++++++++++++++++++--
1 file changed, 32 insertions(+), 2 deletions(-)
diff --git a/include/linux/module.h b/include/linux/module.h
index c9f1200b2312..22eacd5e1e85 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -609,10 +609,40 @@ void symbol_put_addr(void *addr);
to handle the error case (which only happens with rmmod --wait). */
extern void __module_get(struct module *module);
-/* This is the Right Way to get a module: if it fails, it's being removed,
- * so pretend it's not there. */
+/**
+ * try_module_get() - yields to module removal and bumps refcnt otherwise
+ * @module: the module we should check for
+ *
+ * This can be used to try to bump the reference count of a module, so to
+ * prevent module removal. The reference count of a module is not allowed
+ * to be incremented if the module is already being removed.
+ *
+ * Care must be taken to ensure the module cannot be removed during the call to
+ * try_module_get(). This can be done by having another entity other than the
+ * module itself increment the module reference count, or through some other
+ * means which guarantees the module could not be removed during an operation.
+ * An example of this later case is using try_module_get() in a sysfs file
+ * which the module created. The sysfs store / read file operations are
+ * gauranteed to exist through the use of kernfs's active reference (see
+ * kernfs_active()). If a sysfs file operation is being run, the module which
+ * created it must still exist as the module is in charge of removing the same
+ * sysfs file being read. Also, a sysfs / kernfs file removal cannot happen
+ * unless the same file is not active.
+ *
+ * One of the real values to try_module_get() is the module_is_live() check
+ * which ensures this the caller of try_module_get() can yield to userspace
+ * module removal requests and fail whatever it was about to process.
+ */
extern bool try_module_get(struct module *module);
+/**
+ * module_put() - release a reference count to a module
+ * @module: the module we should release a reference count for
+ *
+ * If you successfully bump a reference count to a module with try_module_get(),
+ * when you are finished you must call module_put() to release that reference
+ * count.
+ */
extern void module_put(struct module *module);
#else /*!CONFIG_MODULE_UNLOAD*/
--
2.30.2
This extends test_sysfs with support for using the failure injection
wait completion and knobs to force a few race conditions which
demonstrates that kernfs active reference protection is sufficient
for kobject / device protection at higher layers.
This adds 4 new tests which tries to remove the device attribute
store operation in 4 different situations:
1) at the start of kernfs_kernfs_fop_write_iter()
2) before the of->mutex is held in kernfs_kernfs_fop_write_iter()
3) after the of->mutex is held in kernfs_kernfs_fop_write_iter()
4) after the kernfs node active reference is taken
A write fails in call cases except the last one, test number #32. There
is a good explanation for this: *once* kernfs_get_active() gets called
we have a guarantee that the kernfs entry cannot be removed. If
kernfs_get_active() succeeds that entry cannot be removed and so
anything trying to remove that entry will have to wait. It is perhaps
not obvious but since a sysfs write will trigger eventually a
kernfs_get_active() call, and *only* if this succeeds will the sysfs
op be called, this and the fact that you cannot remove the kernfs
entry while the kenfs entry is active implies that a module that
created the respective sysfs / kernfs entry *cannot* possibly be
removed during a sysfs operation. And test number 32 provides us with
proof of this. If it were not true test #32 should crash.
No null dereferences are reproduced, even though this has been observed
in some complex testing cases [0]. If this issue really exists we should
have enough tools on the sysfs_test toolbox now to try to reproduce
this easily without having to poke around other drivers. It very likley
was the case that the issue reported [0] was possibly a side issue after
the first bug which was zram specific. This is why it is important to
isolate the issue and try to reproduce it in a generic form using the
test_sysfs driver.
[0] https://lkml.kernel.org/r/20210623215007.862787-1-mcgrof@kernel.org
Signed-off-by: Luis Chamberlain <mcgrof(a)kernel.org>
---
lib/Kconfig.debug | 3 +
lib/test_sysfs.c | 31 +++++
tools/testing/selftests/sysfs/config | 3 +
tools/testing/selftests/sysfs/sysfs.sh | 175 +++++++++++++++++++++++++
4 files changed, 212 insertions(+)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a29b7d398c4e..176b822654e5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2358,6 +2358,9 @@ config TEST_SYSFS
depends on SYSFS
depends on NET
depends on BLOCK
+ select FAULT_INJECTION
+ select FAULT_INJECTION_DEBUG_FS
+ select FAIL_KERNFS_KNOBS
help
This builds the "test_sysfs" module. This driver enables to test the
sysfs file system safely without affecting production knobs which
diff --git a/lib/test_sysfs.c b/lib/test_sysfs.c
index 273fc3f39740..391e0af2864a 100644
--- a/lib/test_sysfs.c
+++ b/lib/test_sysfs.c
@@ -38,6 +38,11 @@
#include <linux/rtnetlink.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
+#include <linux/kernfs.h>
+
+#ifdef CONFIG_FAIL_KERNFS_KNOBS
+MODULE_IMPORT_NS(KERNFS_DEBUG_PRIVATE);
+#endif
static bool enable_lock;
module_param(enable_lock, bool_enable_only, 0644);
@@ -82,6 +87,13 @@ static bool enable_verbose_rmmod;
module_param(enable_verbose_rmmod, bool_enable_only, 0644);
MODULE_PARM_DESC(enable_verbose_rmmod, "enable verbose print messages on rmmod");
+#ifdef CONFIG_FAIL_KERNFS_KNOBS
+static bool enable_completion_on_rmmod;
+module_param(enable_completion_on_rmmod, bool_enable_only, 0644);
+MODULE_PARM_DESC(enable_completion_on_rmmod,
+ "enable sending a kernfs completion on rmmod");
+#endif
+
static int sysfs_test_major;
/**
@@ -289,6 +301,12 @@ static ssize_t config_show(struct device *dev,
"enable_verbose_writes:\t%s\n",
enable_verbose_writes ? "true" : "false");
+#ifdef CONFIG_FAIL_KERNFS_KNOBS
+ len += snprintf(buf+len, PAGE_SIZE - len,
+ "enable_completion_on_rmmod:\t%s\n",
+ enable_completion_on_rmmod ? "true" : "false");
+#endif
+
test_dev_config_unlock(test_dev);
return len;
@@ -926,10 +944,23 @@ static int __init test_sysfs_init(void)
}
module_init(test_sysfs_init);
+#ifdef CONFIG_FAIL_KERNFS_KNOBS
+/* The goal is to race our device removal with a pending kernfs -> store call */
+static void test_sysfs_kernfs_send_completion_rmmod(void)
+{
+ if (!enable_completion_on_rmmod)
+ return;
+ complete(&kernfs_debug_wait_completion);
+}
+#else
+static inline void test_sysfs_kernfs_send_completion_rmmod(void) {}
+#endif
+
static void __exit test_sysfs_exit(void)
{
if (enable_debugfs)
debugfs_remove(debugfs_dir);
+ test_sysfs_kernfs_send_completion_rmmod();
if (delay_rmmod_ms)
msleep(delay_rmmod_ms);
unregister_test_dev_sysfs(first_test_dev);
diff --git a/tools/testing/selftests/sysfs/config b/tools/testing/selftests/sysfs/config
index 9196f452ecd5..2876a229f95b 100644
--- a/tools/testing/selftests/sysfs/config
+++ b/tools/testing/selftests/sysfs/config
@@ -1,2 +1,5 @@
CONFIG_SYSFS=m
CONFIG_TEST_SYSFS=m
+CONFIG_FAULT_INJECTION=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FAIL_KERNFS_KNOBS=y
diff --git a/tools/testing/selftests/sysfs/sysfs.sh b/tools/testing/selftests/sysfs/sysfs.sh
index b3f4c2236c7f..f928635d0e35 100755
--- a/tools/testing/selftests/sysfs/sysfs.sh
+++ b/tools/testing/selftests/sysfs/sysfs.sh
@@ -62,6 +62,10 @@ ALL_TESTS="$ALL_TESTS 0025:1:1:test_dev_y:block"
ALL_TESTS="$ALL_TESTS 0026:1:1:test_dev_y:block"
ALL_TESTS="$ALL_TESTS 0027:1:0:test_dev_x:block" # deadlock test
ALL_TESTS="$ALL_TESTS 0028:1:0:test_dev_x:block" # deadlock test with rntl_lock
+ALL_TESTS="$ALL_TESTS 0029:1:1:test_dev_x:block" # kernfs race removal of store
+ALL_TESTS="$ALL_TESTS 0030:1:1:test_dev_x:block" # kernfs race removal before mutex
+ALL_TESTS="$ALL_TESTS 0031:1:1:test_dev_x:block" # kernfs race removal after mutex
+ALL_TESTS="$ALL_TESTS 0032:1:1:test_dev_x:block" # kernfs race removal after active
allow_user_defaults()
{
@@ -92,6 +96,9 @@ allow_user_defaults()
if [ -z $SYSFS_DEBUGFS_DIR ]; then
SYSFS_DEBUGFS_DIR="/sys/kernel/debug/test_sysfs"
fi
+ if [ -z $KERNFS_DEBUGFS_DIR ]; then
+ KERNFS_DEBUGFS_DIR="/sys/kernel/debug/kernfs"
+ fi
if [ -z $PAGE_SIZE ]; then
PAGE_SIZE=$(getconf PAGESIZE)
fi
@@ -167,6 +174,14 @@ modprobe_reset_enable_rtnl_lock_on_rmmod()
unset FIRST_MODPROBE_ARGS
}
+modprobe_reset_enable_completion()
+{
+ FIRST_MODPROBE_ARGS="enable_completion_on_rmmod=1 enable_verbose_writes=1"
+ FIRST_MODPROBE_ARGS="$FIRST_MODPROBE_ARGS enable_verbose_rmmod=1 delay_rmmod_ms=0"
+ modprobe_reset
+ unset FIRST_MODPROBE_ARGS
+}
+
load_req_mod()
{
modprobe_reset
@@ -197,6 +212,63 @@ debugfs_reset_first_test_dev_ignore_errors()
echo -n "1" >"$SYSFS_DEBUGFS_DIR"/reset_first_test_dev
}
+debugfs_kernfs_kernfs_fop_write_iter_exists()
+{
+ KNOB_DIR="${KERNFS_DEBUGFS_DIR}/config_fail_kernfs_fop_write_iter"
+ if [[ ! -d $KNOB_DIR ]]; then
+ echo "kernfs debugfs does not exist $KNOB_DIR"
+ return 0;
+ fi
+ KNOB_DEBUGFS="${KERNFS_DEBUGFS_DIR}/fail_kernfs_fop_write_iter"
+ if [[ ! -d $KNOB_DEBUGFS ]]; then
+ echo -n "kernfs debugfs for coniguring fail_kernfs_fop_write_iter "
+ echo "does not exist $KNOB_DIR"
+ return 0;
+ fi
+ return 1
+}
+
+debugfs_kernfs_kernfs_fop_write_iter_set_fail_once()
+{
+ KNOB_DEBUGFS="${KERNFS_DEBUGFS_DIR}/fail_kernfs_fop_write_iter"
+ echo 1 > $KNOB_DEBUGFS/interval
+ echo 100 > $KNOB_DEBUGFS/probability
+ echo 0 > $KNOB_DEBUGFS/space
+ # Disable verbose messages on the kernel ring buffer which may
+ # confuse developers with a kernel panic.
+ echo 0 > $KNOB_DEBUGFS/verbose
+
+ # Fail only once
+ echo 1 > $KNOB_DEBUGFS/times
+}
+
+debugfs_kernfs_kernfs_fop_write_iter_set_fail_never()
+{
+ KNOB_DEBUGFS="${KERNFS_DEBUGFS_DIR}/fail_kernfs_fop_write_iter"
+ echo 0 > $KNOB_DEBUGFS/times
+}
+
+debugfs_kernfs_set_wait_ms()
+{
+ SLEEP_AFTER_WAIT_MS="${KERNFS_DEBUGFS_DIR}/sleep_after_wait_ms"
+ echo $1 > $SLEEP_AFTER_WAIT_MS
+}
+
+debugfs_kernfs_disable_wait_kernfs_fop_write_iter()
+{
+ ENABLE_WAIT_KNOB="${KERNFS_DEBUGFS_DIR}/config_fail_kernfs_fop_write_iter/wait_"
+ for KNOB in ${ENABLE_WAIT_KNOB}*; do
+ echo 0 > $KNOB
+ done
+}
+
+debugfs_kernfs_enable_wait_kernfs_fop_write_iter()
+{
+ ENABLE_WAIT_KNOB="${KERNFS_DEBUGFS_DIR}/config_fail_kernfs_fop_write_iter/wait_$1"
+ echo -n "1" > $ENABLE_WAIT_KNOB
+ return $?
+}
+
set_orig()
{
if [[ ! -z $TARGET ]] && [[ ! -z $ORIG ]]; then
@@ -972,6 +1044,105 @@ sysfs_test_0028()
fi
}
+sysfs_race_kernfs_kernfs_fop_write_iter()
+{
+ TARGET="${DIR}/$(get_test_target $1)"
+ WAIT_AT=$2
+ EXPECT_WRITE_RETURNS=$3
+ MSDELAY=$4
+
+ modprobe_reset_enable_completion
+ ORIG=$(cat "${TARGET}")
+ TEST_STR=$(( $ORIG + 1 ))
+
+ echo -n "Test racing removal of sysfs store op with kernfs $WAIT_AT ... "
+
+ if debugfs_kernfs_kernfs_fop_write_iter_exists; then
+ echo -n "skipping test as CONFIG_FAIL_KERNFS_KNOBS "
+ echo " or CONFIG_FAULT_INJECTION_DEBUG_FS is disabled"
+ return $ksft_skip
+ fi
+
+ # Allow for failing the kernfs_kernfs_fop_write_iter call once,
+ # we'll provide exact context shortly afterwards.
+ debugfs_kernfs_kernfs_fop_write_iter_set_fail_once
+
+ # First disable all waits
+ debugfs_kernfs_disable_wait_kernfs_fop_write_iter
+
+ # Enable a wait_for_completion(&kernfs_debug_wait_completion) at the
+ # specified location inside the kernfs_fop_write_iter() routine
+ debugfs_kernfs_enable_wait_kernfs_fop_write_iter $WAIT_AT
+
+ # Configure kernfs so that after its wait_for_completion() it
+ # will msleep() this amount of time and schedule(). We figure this
+ # will be sufficient time to allow for our module removal to complete.
+ debugfs_kernfs_set_wait_ms $MSDELAY
+
+ # Now we trigger a kernfs write op, which will run kernfs_fop_write_iter,
+ # but will wait until our driver sends a respective completion
+ set_test_ignore_errors &
+ write_pid=$!
+
+ # At this point kernfs_fop_write_iter() hasn't run our op, its
+ # waiting for our completion at the specified time $WAIT_AT.
+ # We now remove our module which will send a
+ # complete(&kernfs_debug_wait_completion) right before we deregister
+ # our device and the sysfs device attributes are removed.
+ #
+ # After the completion is sent, the test_sysfs driver races with
+ # kernfs to do the device deregistration with the kernfs msleep
+ # and schedule(). This should mean we've forced trying to remove the
+ # module prior to allowing kernfs to run our store operation. If the
+ # race did happen we'll panic with a null dereference on the store op.
+ #
+ # If no race happens we should see no write operation triggered.
+ modprobe -r $TEST_DRIVER > /dev/null 2>&1
+
+ debugfs_kernfs_kernfs_fop_write_iter_set_fail_never
+
+ wait $write_pid
+ if [[ $? -eq $EXPECT_WRITE_RETURNS ]]; then
+ echo "ok"
+ else
+ echo "FAIL" >&2
+ fi
+}
+
+sysfs_test_0029()
+{
+ for delay in 0 2 4 8 16 32 64 128 246 512 1024; do
+ echo "Using delay-after-completion: $delay"
+ sysfs_race_kernfs_kernfs_fop_write_iter 0029 at_start 1 $delay
+ done
+}
+
+sysfs_test_0030()
+{
+ for delay in 0 2 4 8 16 32 64 128 246 512 1024; do
+ echo "Using delay-after-completion: $delay"
+ sysfs_race_kernfs_kernfs_fop_write_iter 0030 before_mutex 1 $delay
+ done
+}
+
+sysfs_test_0031()
+{
+ for delay in 0 2 4 8 16 32 64 128 246 512 1024; do
+ echo "Using delay-after-completion: $delay"
+ sysfs_race_kernfs_kernfs_fop_write_iter 0031 after_mutex 1 $delay
+ done
+}
+
+# A write only succeeds *iff* a module removal happens *after* the
+# kernfs active reference is obtained with kernfs_get_active().
+sysfs_test_0032()
+{
+ for delay in 0 2 4 8 16 32 64 128 246 512 1024; do
+ echo "Using delay-after-completion: $delay"
+ sysfs_race_kernfs_kernfs_fop_write_iter 0032 after_active 0 $delay
+ done
+}
+
test_gen_desc()
{
echo -n "$1 x $(get_test_count $1)"
@@ -1013,6 +1184,10 @@ list_tests()
echo "$(test_gen_desc 0026) - block test writing y larger delay and resetting device"
echo "$(test_gen_desc 0027) - test rmmod deadlock while writing x ... "
echo "$(test_gen_desc 0028) - test rmmod deadlock using rtnl_lock while writing x ..."
+ echo "$(test_gen_desc 0029) - racing removal of store op with kernfs at start"
+ echo "$(test_gen_desc 0030) - racing removal of store op with kernfs before mutex"
+ echo "$(test_gen_desc 0031) - racing removal of store op with kernfs after mutex"
+ echo "$(test_gen_desc 0032) - racing removal of store op with kernfs after active"
}
usage()
--
2.30.2
This adds initial failure injection support to kernfs. We start
off with debug knobs which when enabled allow test drivers, such as
test_sysfs, to then make use of these to try to force certain
difficult races to take place with a high degree of certainty.
This only adds runtime code *iff* the new bool CONFIG_FAIL_KERNFS_KNOBS is
enabled in your kernel. If you don't have this enabled this provides
no new functional. When CONFIG_FAIL_KERNFS_KNOBS is disabled the new
routine kernfs_debug_should_wait() ends up being transformed to if
(false), and so the compiler should optimize these out as dead code
producing no new effective binary changes.
We start off with enabling failure injections in kernfs by allowing us to
alter the way kernfs_fop_write_iter() behaves. We allow for the routine
kernfs_fop_write_iter() to wait for a certain condition in the kernel to
occur, after which it will sleep a predefined amount of time. This lets
kernfs users to time exactly when it want kernfs_fop_write_iter() to
complete, allowing for developing race conditions and test for correctness
in kernfs.
You'd boot with this enabled on your kernel command line:
fail_kernfs_fop_write_iter=1,100,0,1
The values are <interval,probability,size,times>, we don't care for
size, so for now we ignore it. The above ensures a failure will trigger
only once.
*How* we allow for this routine to change behaviour is left to knobs we
expose under debugfs:
# ls -1 /sys/kernel/debug/kernfs/config_fail_kernfs_fop_write_iter/
wait_after_active
wait_after_mutex
wait_at_start
wait_before_mutex
A debugfs entry also exists to allow us to sleep a configurabler amount
of time after the completion:
/sys/kernel/debug/kernfs/sleep_after_wait_ms
These two sets of knobs allow us to construct races and demonstrate
how the kernfs active reference should suffice to project against
races.
Enabling CONFIG_FAULT_INJECTION_DEBUG_FS enables us to configure the
differnt fault injection parametres for the new fail_kernfs_fop_write_iter
fault injection at run time:
ls -1 /sys/kernel/debug/kernfs/fail_kernfs_fop_write_iter/
interval
probability
space
task-filter
times
verbose
verbose_ratelimit_burst
verbose_ratelimit_interval_ms
Signed-off-by: Luis Chamberlain <mcgrof(a)kernel.org>
---
.../fault-injection/fault-injection.rst | 22 +++++
MAINTAINERS | 2 +-
fs/kernfs/Makefile | 1 +
fs/kernfs/failure-injection.c | 91 +++++++++++++++++++
fs/kernfs/file.c | 13 +++
fs/kernfs/kernfs-internal.h | 72 +++++++++++++++
include/linux/kernfs.h | 5 +
lib/Kconfig.debug | 10 ++
8 files changed, 215 insertions(+), 1 deletion(-)
create mode 100644 fs/kernfs/failure-injection.c
diff --git a/Documentation/fault-injection/fault-injection.rst b/Documentation/fault-injection/fault-injection.rst
index 4a25c5eb6f07..d4d34b082f47 100644
--- a/Documentation/fault-injection/fault-injection.rst
+++ b/Documentation/fault-injection/fault-injection.rst
@@ -28,6 +28,28 @@ Available fault injection capabilities
injects kernel RPC client and server failures.
+- fail_kernfs_fop_write_iter
+
+ Allows for failures to be enabled inside kernfs_fop_write_iter(). Enabling
+ this does not immediately enable any errors to occur. You must configure
+ how you want this routine to fail or change behaviour by using the debugfs
+ knobs for it:
+
+ # ls -1 /sys/kernel/debug/kernfs/config_fail_kernfs_fop_write_iter/
+ wait_after_active
+ wait_after_mutex
+ wait_at_start
+ wait_before_mutex
+
+ You can also configure how long to sleep after a wait under
+
+ /sys/kernel/debug/kernfs/sleep_after_wait_ms
+
+ If you enable CONFIG_FAULT_INJECTION_DEBUG_FS the fail_add_disk failure
+ injection parameters are placed under:
+
+ /sys/kernel/debug/kernfs/fail_kernfs_fop_write_iter/
+
- fail_make_request
injects disk IO errors on devices permitted by setting
diff --git a/MAINTAINERS b/MAINTAINERS
index 28a34384f541..acdbf91058d5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10341,7 +10341,7 @@ M: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
M: Tejun Heo <tj(a)kernel.org>
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
-F: fs/kernfs/
+F: fs/kernfs/*
F: include/linux/kernfs.h
KEXEC
diff --git a/fs/kernfs/Makefile b/fs/kernfs/Makefile
index 4ca54ff54c98..bc5b32ca39f9 100644
--- a/fs/kernfs/Makefile
+++ b/fs/kernfs/Makefile
@@ -4,3 +4,4 @@
#
obj-y := mount.o inode.o dir.o file.o symlink.o
+obj-$(CONFIG_FAIL_KERNFS_KNOBS) += failure-injection.o
diff --git a/fs/kernfs/failure-injection.c b/fs/kernfs/failure-injection.c
new file mode 100644
index 000000000000..4130d202c13b
--- /dev/null
+++ b/fs/kernfs/failure-injection.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/fault-inject.h>
+#include <linux/delay.h>
+
+#include "kernfs-internal.h"
+
+static DECLARE_FAULT_ATTR(fail_kernfs_fop_write_iter);
+struct kernfs_config_fail kernfs_config_fail;
+
+#define kernfs_config_fail(when) \
+ kernfs_config_fail.kernfs_fop_write_iter_fail.wait_ ## when
+
+#define kernfs_config_fail(when) \
+ kernfs_config_fail.kernfs_fop_write_iter_fail.wait_ ## when
+
+static int __init setup_fail_kernfs_fop_write_iter(char *str)
+{
+ return setup_fault_attr(&fail_kernfs_fop_write_iter, str);
+}
+
+__setup("fail_kernfs_fop_write_iter=", setup_fail_kernfs_fop_write_iter);
+
+struct dentry *kernfs_debugfs_root;
+struct dentry *config_fail_kernfs_fop_write_iter;
+
+static int __init kernfs_init_failure_injection(void)
+{
+ kernfs_config_fail.sleep_after_wait_ms = 100;
+ kernfs_debugfs_root = debugfs_create_dir("kernfs", NULL);
+
+ fault_create_debugfs_attr("fail_kernfs_fop_write_iter",
+ kernfs_debugfs_root, &fail_kernfs_fop_write_iter);
+
+ config_fail_kernfs_fop_write_iter =
+ debugfs_create_dir("config_fail_kernfs_fop_write_iter",
+ kernfs_debugfs_root);
+
+ debugfs_create_u32("sleep_after_wait_ms", 0600,
+ kernfs_debugfs_root,
+ &kernfs_config_fail.sleep_after_wait_ms);
+
+ debugfs_create_bool("wait_at_start", 0600,
+ config_fail_kernfs_fop_write_iter,
+ &kernfs_config_fail(at_start));
+ debugfs_create_bool("wait_before_mutex", 0600,
+ config_fail_kernfs_fop_write_iter,
+ &kernfs_config_fail(before_mutex));
+ debugfs_create_bool("wait_after_mutex", 0600,
+ config_fail_kernfs_fop_write_iter,
+ &kernfs_config_fail(after_mutex));
+ debugfs_create_bool("wait_after_active", 0600,
+ config_fail_kernfs_fop_write_iter,
+ &kernfs_config_fail(after_active));
+ return 0;
+}
+late_initcall(kernfs_init_failure_injection);
+
+int __kernfs_debug_should_wait_kernfs_fop_write_iter(bool evaluate)
+{
+ if (!evaluate)
+ return 0;
+
+ return should_fail(&fail_kernfs_fop_write_iter, 0);
+}
+
+DECLARE_COMPLETION(kernfs_debug_wait_completion);
+EXPORT_SYMBOL_NS_GPL(kernfs_debug_wait_completion, KERNFS_DEBUG_PRIVATE);
+
+void kernfs_debug_wait(void)
+{
+ unsigned long timeout;
+
+ timeout = wait_for_completion_timeout(&kernfs_debug_wait_completion,
+ msecs_to_jiffies(3000));
+ if (!timeout)
+ pr_info("%s waiting for kernfs_debug_wait_completion timed out\n",
+ __func__);
+ else
+ pr_info("%s received completion with time left on timeout %u ms\n",
+ __func__, jiffies_to_msecs(timeout));
+
+ /**
+ * The goal is wait for an event, and *then* once we have
+ * reached it, the other side will try to do something which
+ * it thinks will break. So we must give it some time to do
+ * that. The amount of time is configurable.
+ */
+ msleep(kernfs_config_fail.sleep_after_wait_ms);
+ pr_info("%s ended\n", __func__);
+}
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 60e2a86c535e..4479c6580333 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -259,6 +259,9 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
const struct kernfs_ops *ops;
char *buf;
+ if (kernfs_debug_should_wait(kernfs_fop_write_iter, at_start))
+ kernfs_debug_wait();
+
if (of->atomic_write_len) {
if (len > of->atomic_write_len)
return -E2BIG;
@@ -280,17 +283,27 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
}
buf[len] = '\0'; /* guarantee string termination */
+ if (kernfs_debug_should_wait(kernfs_fop_write_iter, before_mutex))
+ kernfs_debug_wait();
+
/*
* @of->mutex nests outside active ref and is used both to ensure that
* the ops aren't called concurrently for the same open file.
*/
mutex_lock(&of->mutex);
+
+ if (kernfs_debug_should_wait(kernfs_fop_write_iter, after_mutex))
+ kernfs_debug_wait();
+
if (!kernfs_get_active(of->kn)) {
mutex_unlock(&of->mutex);
len = -ENODEV;
goto out_free;
}
+ if (kernfs_debug_should_wait(kernfs_fop_write_iter, after_active))
+ kernfs_debug_wait();
+
ops = kernfs_ops(of->kn);
if (ops->write)
len = ops->write(of, buf, len, iocb->ki_pos);
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index f9cc912c31e1..9e3abf597e2d 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -18,6 +18,7 @@
#include <linux/kernfs.h>
#include <linux/fs_context.h>
+#include <linux/stringify.h>
struct kernfs_iattrs {
kuid_t ia_uid;
@@ -147,4 +148,75 @@ void kernfs_drain_open_files(struct kernfs_node *kn);
*/
extern const struct inode_operations kernfs_symlink_iops;
+/*
+ * failure-injection.c
+ */
+#ifdef CONFIG_FAIL_KERNFS_KNOBS
+
+/**
+ * struct kernfs_fop_write_iter_fail - how kernfs_fop_write_iter_fail fails
+ *
+ * This lets you configure what part of kernfs_fop_write_iter() should behave
+ * in a specific way to allow userspace to capture possible failures in
+ * kernfs. The wait knobs are allowed to let you design capture possible
+ * race conditions which would otherwise be difficult to reproduce. A
+ * secondary driver would tell kernfs's wait completion when it is done.
+ *
+ * The point to the wait completion failure injection tests are to confirm
+ * that the kernfs active refcount suffice to ensure other objects in other
+ * layers are also gauranteed to exist, even they are opaque to kernfs. This
+ * includes kobjects, devices, and other objects built on top of this, like
+ * the block layer when using sysfs block device attributes.
+ *
+ * @wait_at_start: waits for completion from a third party at the start of
+ * the routine.
+ * @wait_before_mutex: waits for completion from a third party before we
+ * are allowed to continue before the of->mutex is held.
+ * @wait_after_mutex: waits for completion from a third party after we
+ * have held the of->mutex.
+ * @wait_after_active: waits for completion from a thid party after we
+ * have refcounted the struct kernfs_node.
+ */
+struct kernfs_fop_write_iter_fail {
+ bool wait_at_start;
+ bool wait_before_mutex;
+ bool wait_after_mutex;
+ bool wait_after_active;
+};
+
+/**
+ * struct kernfs_config_fail - kernfs configuration for failure injection
+ *
+ * You can kernfs failure injection on boot, and in particular we currently
+ * only support failures for kernfs_fop_write_iter(). However, we don't
+ * want to always enable errors on this call when failure injection is enabled
+ * as this routine is used by many parts of the kernel for proper functionality.
+ * The compromise we make is we let userspace start enabling which parts it
+ * wants to fail after boot, if and only if failure injection has been enabled.
+ *
+ * @kernfs_fop_write_iter_fail: configuration for how we want to allow
+ * for failure injection on kernfs_fop_write_iter()
+ * @sleep_after_wait_ms: how many ms to wait after completion is received.
+ */
+struct kernfs_config_fail {
+ struct kernfs_fop_write_iter_fail kernfs_fop_write_iter_fail;
+ u32 sleep_after_wait_ms;
+};
+
+extern struct kernfs_config_fail kernfs_config_fail;
+
+#define __kernfs_config_wait_var(func, when) \
+ (kernfs_config_fail. func ## _fail.wait_ ## when)
+#define __kernfs_debug_should_wait_func_name(func) __kernfs_debug_should_wait_## func
+
+#define kernfs_debug_should_wait(func, when) \
+ __kernfs_debug_should_wait_func_name(func)(__kernfs_config_wait_var(func, when))
+int __kernfs_debug_should_wait_kernfs_fop_write_iter(bool evaluate);
+void kernfs_debug_wait(void);
+#else
+static inline void kernfs_init_failure_injection(void) {}
+#define kernfs_debug_should_wait(func, when) (false)
+static inline void kernfs_debug_wait(void) {}
+#endif
+
#endif /* __KERNFS_INTERNAL_H */
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 3ccce6f24548..cd968ee2b503 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -411,6 +411,11 @@ void kernfs_init(void);
struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
u64 id);
+
+#ifdef CONFIG_FAIL_KERNFS_KNOBS
+extern struct completion kernfs_debug_wait_completion;
+#endif
+
#else /* CONFIG_KERNFS */
static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ae19bf1a21b8..a29b7d398c4e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1902,6 +1902,16 @@ config FAULT_INJECTION_USERCOPY
Provides fault-injection capability to inject failures
in usercopy functions (copy_from_user(), get_user(), ...).
+config FAIL_KERNFS_KNOBS
+ bool "Fault-injection support in kernfs"
+ depends on FAULT_INJECTION
+ help
+ Provide fault-injection capability for kernfs. This only enables
+ the error injection functionality. To use it you must configure which
+ which path you want to trigger on error on using debugfs under
+ /sys/kernel/debug/kernfs/config_fail_kernfs_fop_write_iter/. By
+ default all of these are disabled.
+
config FAIL_MAKE_REQUEST
bool "Fault-injection capability for disk IO"
depends on FAULT_INJECTION && BLOCK
--
2.30.2
Two selftests drivers exist under the copyleft-next license.
These drivers were added prior to SPDX practice taking full swing
in the kernel. Now that we have an SPDX tag for copylef-next-0.3.1
documented, embrace it and remove the boiler plate.
Cc: Goldwyn Rodrigues <rgoldwyn(a)suse.com>
Cc: Kuno Woudt <kuno(a)frob.nl>
Cc: Richard Fontana <fontana(a)sharpeleven.org>
Cc: copyleft-next(a)lists.fedorahosted.org
Cc: Ciaran Farrell <Ciaran.Farrell(a)suse.com>
Cc: Christopher De Nicolo <Christopher.DeNicolo(a)suse.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Thorsten Leemhuis <linux(a)leemhuis.info>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Luis Chamberlain <mcgrof(a)kernel.org>
---
lib/test_kmod.c | 12 +-----------
lib/test_sysctl.c | 12 +-----------
tools/testing/selftests/kmod/kmod.sh | 13 +------------
tools/testing/selftests/sysctl/sysctl.sh | 12 +-----------
4 files changed, 4 insertions(+), 45 deletions(-)
diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index ce1589391413..d62afd89dc63 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -1,18 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1
/*
* kmod stress test driver
*
* Copyright (C) 2017 Luis R. Rodriguez <mcgrof(a)kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or at your option any
- * later version; or, when distributed separately from the Linux kernel or
- * when incorporated into other software packages, subject to the following
- * license:
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of copyleft-next (version 0.3.1 or later) as published
- * at http://copyleft-next.org/.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
index 3750323973f4..9e5bd10a930a 100644
--- a/lib/test_sysctl.c
+++ b/lib/test_sysctl.c
@@ -1,18 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1
/*
* proc sysctl test driver
*
* Copyright (C) 2017 Luis R. Rodriguez <mcgrof(a)kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or at your option any
- * later version; or, when distributed separately from the Linux kernel or
- * when incorporated into other software packages, subject to the following
- * license:
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of copyleft-next (version 0.3.1 or later) as published
- * at http://copyleft-next.org/.
*/
/*
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
index afd42387e8b2..7189715d7960 100755
--- a/tools/testing/selftests/kmod/kmod.sh
+++ b/tools/testing/selftests/kmod/kmod.sh
@@ -1,18 +1,7 @@
#!/bin/bash
-#
+# SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1
# Copyright (C) 2017 Luis R. Rodriguez <mcgrof(a)kernel.org>
#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the Free
-# Software Foundation; either version 2 of the License, or at your option any
-# later version; or, when distributed separately from the Linux kernel or
-# when incorporated into other software packages, subject to the following
-# license:
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of copyleft-next (version 0.3.1 or later) as published
-# at http://copyleft-next.org/.
-
# This is a stress test script for kmod, the kernel module loader. It uses
# test_kmod which exposes a series of knobs for the API for us so we can
# tweak each test in userspace rather than in kernelspace.
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index 19515dcb7d04..2046c603a4d4 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -1,16 +1,6 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1
# Copyright (C) 2017 Luis R. Rodriguez <mcgrof(a)kernel.org>
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the Free
-# Software Foundation; either version 2 of the License, or at your option any
-# later version; or, when distributed separately from the Linux kernel or
-# when incorporated into other software packages, subject to the following
-# license:
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of copyleft-next (version 0.3.1 or later) as published
-# at http://copyleft-next.org/.
# This performs a series tests against the proc sysctl interface.
--
2.30.2
Add the full text of the copyleft-next-0.3.1 license to the kernel
tree as well as the required tags for reference and tooling.
The license text was copied directly from the copyleft-next project's
git tree [0].
Discussion of using copyleft-next-0.3.1 on Linux started since June,
2016 [1]. In the end Linus' preference was to have drivers use
MODULE_LICENSE("GPL") to make it clear that the GPL applies when it
comes to Linux [2]. Additionally, even though copyleft-next-0.3.1 has
been found to be to be GPLv2 compatible by three attorneys at SUSE and
Redhat [3], to err on the side of caution we simply recommend to
always use the "OR" language for this license [4].
Even though it has been a goal of the project to be GPL-v2 compatible
to be certain in 2016 I asked for a clarification about what makes
copyleft-next GPLv2 compatible and also asked for a summary of
benefits. This prompted some small minor changes to make compatibility
even further clear and as of copyleft 0.3.1 compatibility should
be crystal clear [5].
The summary of why copyleft-next 0.3.1 is compatible with GPLv2
is explained as follows:
Like GPLv2, copyleft-next requires distribution of derivative works
("Derived Works" in copyleft-next 0.3.x) to be under the same license.
Ordinarily this would make the two licenses incompatible. However,
copyleft-next 0.3.1 says: "If the Derived Work includes material
licensed under the GPL, You may instead license the Derived Work under
the GPL." "GPL" is defined to include GPLv2.
In practice this means copyleft-next code in Linux may be licensed
under the GPL2, however there are additional obvious gains for
bringing contributions from Linux outbound where copyleft-next is
preferred. A summary of benefits why projects outside of Linux might
prefer to use copyleft-next >= 0.3.1 over GPLv2:
o It is much shorter and simpler
o It has an explicit patent license grant, unlike GPLv2
o Its notice preservation conditions are clearer
o More free software/open source licenses are compatible
with it (via section 4)
o The source code requirement triggered by binary distribution
is much simpler in a procedural sense
o Recipients potentially have a contract claim against distributors
who are noncompliant with the source code requirement
o There is a built-in inbound=outbound policy for upstream
contributions (cf. Apache License 2.0 section 5)
o There are disincentives to engage in the controversial practice
of copyleft/ proprietary dual-licensing
o In 15 years copyleft expires, which can be advantageous
for legacy code
o There are explicit disincentives to bringing patent infringement
claims accusing the licensed work of infringement (see 10b)
o There is a cure period for licensees who are not compliant
with the license (there is no cure opportunity in GPLv2)
o copyleft-next has a 'built-in or-later' provision
The first driver submission to Linux under this dual strategy was
lib/test_sysctl.c through commit 9308f2f9e7f05 ("test_sysctl: add
dedicated proc sysctl test driver") merged in July 2017. Shortly after
that I also added test_kmod through commit d9c6a72d6fa29 ("kmod: add
test driver to stress test the module loader") in the same month. These
two drivers went in just a few months before the SPDX license practice
kicked in. In 2018 Kuno Woudt went through the process to get SPDX
identifiers for copyleft-next [6] [7]. Although there are SPDX tags
for copyleft-next-0.3.0, we only document use in Linux starting from
copyleft-next-0.3.1 which makes GPLv2 compatibility crystal clear.
This patch will let us update the two Linux selftest drivers in
subsequent patches with their respective SPDX license identifiers and
let us remove repetitive license boiler plate.
[0] https://github.com/copyleft-next/copyleft-next/blob/master/Releases/copylef…
[1] https://lore.kernel.org/lkml/1465929311-13509-1-git-send-email-mcgrof@kerne…
[2] https://lore.kernel.org/lkml/CA+55aFyhxcvD+q7tp+-yrSFDKfR0mOHgyEAe=f_94aKLs…
[3] https://lore.kernel.org/lkml/20170516232702.GL17314@wotan.suse.de/
[4] https://lkml.kernel.org/r/1495234558.7848.122.camel@linux.intel.com
[5] https://lists.fedorahosted.org/archives/list/copyleft-next@lists.fedorahost…
[6] https://spdx.org/licenses/copyleft-next-0.3.0.html
[7] https://spdx.org/licenses/copyleft-next-0.3.1.html
Cc: Goldwyn Rodrigues <rgoldwyn(a)suse.com>
Cc: Kuno Woudt <kuno(a)frob.nl>
Cc: Richard Fontana <fontana(a)sharpeleven.org>
Cc: copyleft-next(a)lists.fedorahosted.org
Cc: Ciaran Farrell <Ciaran.Farrell(a)suse.com>
Cc: Christopher De Nicolo <Christopher.DeNicolo(a)suse.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Thorsten Leemhuis <linux(a)leemhuis.info>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Luis Chamberlain <mcgrof(a)kernel.org>
---
LICENSES/dual/copyleft-next-0.3.1 | 237 ++++++++++++++++++++++++++++++
1 file changed, 237 insertions(+)
create mode 100644 LICENSES/dual/copyleft-next-0.3.1
diff --git a/LICENSES/dual/copyleft-next-0.3.1 b/LICENSES/dual/copyleft-next-0.3.1
new file mode 100644
index 000000000000..086bcb74b478
--- /dev/null
+++ b/LICENSES/dual/copyleft-next-0.3.1
@@ -0,0 +1,237 @@
+Valid-License-Identifier: copyleft-next-0.3.1
+SPDX-URL: https://spdx.org/licenses/copyleft-next-0.3.1
+Usage-Guide:
+ This license can be used in code, it has been found to be GPLv2 compatible
+ by attorneys at Redhat and SUSE, however to air on the side of caution,
+ it's best to only use it together with a GPL2 compatible license using "OR".
+ To use the copyleft-next-0.3.1 license put the following SPDX tag/value
+ pair into a comment according to the placement guidelines in the
+ licensing rules documentation:
+ SPDX-License-Identifier: GPL-2.0 OR copyleft-next-0.3.1
+ SPDX-License-Identifier: GPL-2.0-only OR copyleft-next 0.3.1
+ SPDX-License-Identifier: GPL-2.0+ OR copyleft-next-0.3.1
+ SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1
+License-Text:
+
+=======================================================================
+
+ copyleft-next 0.3.1 ("this License")
+ Release date: 2016-04-29
+
+1. License Grants; No Trademark License
+
+ Subject to the terms of this License, I grant You:
+
+ a) A non-exclusive, worldwide, perpetual, royalty-free, irrevocable
+ copyright license, to reproduce, Distribute, prepare derivative works
+ of, publicly perform and publicly display My Work.
+
+ b) A non-exclusive, worldwide, perpetual, royalty-free, irrevocable
+ patent license under Licensed Patents to make, have made, use, sell,
+ offer for sale, and import Covered Works.
+
+ This License does not grant any rights in My name, trademarks, service
+ marks, or logos.
+
+2. Distribution: General Conditions
+
+ You may Distribute Covered Works, provided that You (i) inform
+ recipients how they can obtain a copy of this License; (ii) satisfy the
+ applicable conditions of sections 3 through 6; and (iii) preserve all
+ Legal Notices contained in My Work (to the extent they remain
+ pertinent). "Legal Notices" means copyright notices, license notices,
+ license texts, and author attributions, but does not include logos,
+ other graphical images, trademarks or trademark legends.
+
+3. Conditions for Distributing Derived Works; Outbound GPL Compatibility
+
+ If You Distribute a Derived Work, You must license the entire Derived
+ Work as a whole under this License, with prominent notice of such
+ licensing. This condition may not be avoided through such means as
+ separate Distribution of portions of the Derived Work.
+
+ If the Derived Work includes material licensed under the GPL, You may
+ instead license the Derived Work under the GPL.
+
+4. Condition Against Further Restrictions; Inbound License Compatibility
+
+ When Distributing a Covered Work, You may not impose further
+ restrictions on the exercise of rights in the Covered Work granted under
+ this License. This condition is not excused merely because such
+ restrictions result from Your compliance with conditions or obligations
+ extrinsic to this License (such as a court order or an agreement with a
+ third party).
+
+ However, You may Distribute a Covered Work incorporating material
+ governed by a license that is both OSI-Approved and FSF-Free as of the
+ release date of this License, provided that compliance with such
+ other license would not conflict with any conditions stated in other
+ sections of this License.
+
+5. Conditions for Distributing Object Code
+
+ You may Distribute an Object Code form of a Covered Work, provided that
+ you accompany the Object Code with a URL through which the Corresponding
+ Source is made available, at no charge, by some standard or customary
+ means of providing network access to source code.
+
+ If you Distribute the Object Code in a physical product or tangible
+ storage medium ("Product"), the Corresponding Source must be available
+ through such URL for two years from the date of Your most recent
+ Distribution of the Object Code in the Product. However, if the Product
+ itself contains or is accompanied by the Corresponding Source (made
+ available in a customarily accessible manner), You need not also comply
+ with the first paragraph of this section.
+
+ Each direct and indirect recipient of the Covered Work from You is an
+ intended third-party beneficiary of this License solely as to this
+ section 5, with the right to enforce its terms.
+
+6. Symmetrical Licensing Condition for Upstream Contributions
+
+ If You Distribute a work to Me specifically for inclusion in or
+ modification of a Covered Work (a "Patch"), and no explicit licensing
+ terms apply to the Patch, You license the Patch under this License, to
+ the extent of Your copyright in the Patch. This condition does not
+ negate the other conditions of this License, if applicable to the Patch.
+
+7. Nullification of Copyleft/Proprietary Dual Licensing
+
+ If I offer to license, for a fee, a Covered Work under terms other than
+ a license that is OSI-Approved or FSF-Free as of the release date of this
+ License or a numbered version of copyleft-next released by the
+ Copyleft-Next Project, then the license I grant You under section 1 is no
+ longer subject to the conditions in sections 3 through 5.
+
+8. Copyleft Sunset
+
+ The conditions in sections 3 through 5 no longer apply once fifteen
+ years have elapsed from the date of My first Distribution of My Work
+ under this License.
+
+9. Pass-Through
+
+ When You Distribute a Covered Work, the recipient automatically receives
+ a license to My Work from Me, subject to the terms of this License.
+
+10. Termination
+
+ Your license grants under section 1 are automatically terminated if You
+
+ a) fail to comply with the conditions of this License, unless You cure
+ such noncompliance within thirty days after becoming aware of it, or
+
+ b) initiate a patent infringement litigation claim (excluding
+ declaratory judgment actions, counterclaims, and cross-claims)
+ alleging that any part of My Work directly or indirectly infringes
+ any patent.
+
+ Termination of Your license grants extends to all copies of Covered
+ Works You subsequently obtain. Termination does not terminate the
+ rights of those who have received copies or rights from You subject to
+ this License.
+
+ To the extent permission to make copies of a Covered Work is necessary
+ merely for running it, such permission is not terminable.
+
+11. Later License Versions
+
+ The Copyleft-Next Project may release new versions of copyleft-next,
+ designated by a distinguishing version number ("Later Versions").
+ Unless I explicitly remove the option of Distributing Covered Works
+ under Later Versions, You may Distribute Covered Works under any Later
+ Version.
+
+** 12. No Warranty **
+** **
+** My Work is provided "as-is", without warranty. You bear the risk **
+** of using it. To the extent permitted by applicable law, each **
+** Distributor of My Work excludes the implied warranties of title, **
+** merchantability, fitness for a particular purpose and **
+** non-infringement. **
+
+** 13. Limitation of Liability **
+** **
+** To the extent permitted by applicable law, in no event will any **
+** Distributor of My Work be liable to You for any damages **
+** whatsoever, whether direct, indirect, special, incidental, or **
+** consequential damages, whether arising under contract, tort **
+** (including negligence), or otherwise, even where the Distributor **
+** knew or should have known about the possibility of such damages. **
+
+14. Severability
+
+ The invalidity or unenforceability of any provision of this License
+ does not affect the validity or enforceability of the remainder of
+ this License. Such provision is to be reformed to the minimum extent
+ necessary to make it valid and enforceable.
+
+15. Definitions
+
+ "Copyleft-Next Project" means the project that maintains the source
+ code repository at <https://github.com/copyleft-next/copyleft-next.git/>
+ as of the release date of this License.
+
+ "Corresponding Source" of a Covered Work in Object Code form means (i)
+ the Source Code form of the Covered Work; (ii) all scripts,
+ instructions and similar information that are reasonably necessary for
+ a skilled developer to generate such Object Code from the Source Code
+ provided under (i); and (iii) a list clearly identifying all Separate
+ Works (other than those provided in compliance with (ii)) that were
+ specifically used in building and (if applicable) installing the
+ Covered Work (for example, a specified proprietary compiler including
+ its version number). Corresponding Source must be machine-readable.
+
+ "Covered Work" means My Work or a Derived Work.
+
+ "Derived Work" means a work of authorship that copies from, modifies,
+ adapts, is based on, is a derivative work of, transforms, translates or
+ contains all or part of My Work, such that copyright permission is
+ required. The following are not Derived Works: (i) Mere Aggregation;
+ (ii) a mere reproduction of My Work; and (iii) if My Work fails to
+ explicitly state an expectation otherwise, a work that merely makes
+ reference to My Work.
+
+ "Distribute" means to distribute, transfer or make a copy available to
+ someone else, such that copyright permission is required.
+
+ "Distributor" means Me and anyone else who Distributes a Covered Work.
+
+ "FSF-Free" means classified as 'free' by the Free Software Foundation.
+
+ "GPL" means a version of the GNU General Public License or the GNU
+ Affero General Public License.
+
+ "I"/"Me"/"My" refers to the individual or legal entity that places My
+ Work under this License. "You"/"Your" refers to the individual or legal
+ entity exercising rights in My Work under this License. A legal entity
+ includes each entity that controls, is controlled by, or is under
+ common control with such legal entity. "Control" means (a) the power to
+ direct the actions of such legal entity, whether by contract or
+ otherwise, or (b) ownership of more than fifty percent of the
+ outstanding shares or beneficial ownership of such legal entity.
+
+ "Licensed Patents" means all patent claims licensable royalty-free by
+ Me, now or in the future, that are necessarily infringed by making,
+ using, or selling My Work, and excludes claims that would be infringed
+ only as a consequence of further modification of My Work.
+
+ "Mere Aggregation" means an aggregation of a Covered Work with a
+ Separate Work.
+
+ "My Work" means the particular work of authorship I license to You
+ under this License.
+
+ "Object Code" means any form of a work that is not Source Code.
+
+ "OSI-Approved" means approved as 'Open Source' by the Open Source
+ Initiative.
+
+ "Separate Work" means a work that is separate from and independent of a
+ particular Covered Work and is not by its nature an extension or
+ enhancement of the Covered Work, and/or a runtime library, standard
+ library or similar component that is used to generate an Object Code
+ form of a Covered Work.
+
+ "Source Code" means the preferred form of a work for making
+ modifications to it.
--
2.30.2
udmabuf has the following implicit declaration warns:
udmabuf.c:30:10: warning: implicit declaration of function 'open';
udmabuf.c:42:8: warning: implicit declaration of function 'fcntl'
These are caused due to not including fcntl.h and including just
linux/fcntl.h. Fix it to include fcntl.h which will bring in the
linux/fcntl.h. In addition, define __EXPORTED_HEADERS__ to bring in
F_ADD_SEALS and F_SEAL_SHRINK defines and fix the following error
that show up when just fcntl.h is included.
udmabuf.c:45:21: error: 'F_ADD_SEALS' undeclared
45 | ret = fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK);
| ^~~~~~~~~~~
udmabuf.c:45:34: error: 'F_SEAL_SHRINK' undeclared
45 | ret = fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK);
| ^~~~~~~~~~~~~
Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org>
---
tools/testing/selftests/drivers/dma-buf/udmabuf.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
index 4de902ea14d8..de1c4e6de0b2 100644
--- a/tools/testing/selftests/drivers/dma-buf/udmabuf.c
+++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
@@ -1,10 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
-#include <linux/fcntl.h>
+#include <fcntl.h>
#include <malloc.h>
#include <sys/ioctl.h>
--
2.30.2
This series fixes up a few issues introduced into vec-syscfg during
refactoring in the review process, then adds a new test which ensures
that the behaviour when we attempt to set a vector length which is not
supported by the current system matches what is documented in the SVE
ABI documentation.
Mark Brown (4):
selftests: arm64: Fix printf() format mismatch in vec-syscfg
selftests: arm64: Remove bogus error check on writing to files
selftests: arm64: Fix and enable test for setting current VL in
vec-syscfg
selftests: arm64: Verify that all possible vector lengths are handled
tools/testing/selftests/arm64/fp/vec-syscfg.c | 94 ++++++++++++++++---
1 file changed, 81 insertions(+), 13 deletions(-)
base-commit: 6880fa6c56601bb8ed59df6c30fd390cc5f6dd8f
--
2.20.1
Hi Everybody,
This series consists out of outstanding SGX selftests changes, rebased
and gathered in a single series that is more easily merged for testing
and development, and a few more changes added to expand the existing tests.
The outstanding SGX selftest changes included in this series that have already
been submitted separately are:
* An almost two year old patch fixing a benign linker warning that is still
present today:
https://lore.kernel.org/linux-sgx/20191017030340.18301-2-sean.j.christopher…
The original patch is added intact and not all email addresses
within are valid.
* Latest (v4) of Jarkko Sakkinen's series to add an oversubscription test:
https://lore.kernel.org/linux-sgx/20210809093127.76264-1-jarkko@kernel.org/
* Latest (v2) of Jarkko Sakkinen's patch that provides provide per-op
parameter structs for the test enclave:
https://lore.kernel.org/linux-sgx/20210812224645.90280-1-jarkko@kernel.org/
The reason why most of these patches are outstanding is that they depend
on a kernel change that is still under discussion. Decision to wait in:
https://lore.kernel.org/linux-sgx/f8674dac5579a8a424de1565f7ffa2b5bf2f8e36.…
The original patch for this kernel dependency continues to be included in
this series as a placeholder until the ongoing discussions are concluded.
The new changes introduced in this series builds on Jarkko's outstanding
SGX selftest changes and adds new tests for page permissions, exception
handling, and thread entry.
Reinette
Jarkko Sakkinen (9):
x86/sgx: Add /sys/kernel/debug/x86/sgx_total_mem
selftests/sgx: Assign source for each segment
selftests/sgx: Make data measurement for an enclave segment optional
selftests/sgx: Create a heap for the test enclave
selftests/sgx: Dump segments and /proc/self/maps only on failure
selftests/sgx: Encpsulate the test enclave creation
selftests/sgx: Move setup_test_encl() to each TEST_F()
selftests/sgx: Add a new kselftest: unclobbered_vdso_oversubscribed
selftests/sgx: Provide per-op parameter structs for the test enclave
Reinette Chatre (4):
selftests/sgx: Rename test properties in preparation for more enclave
tests
selftests/sgx: Add page permission and exception test
selftests/sgx: Enable multiple thread support
selftests/sgx: Add test for multiple TCS entry
Sean Christopherson (1):
selftests/x86/sgx: Fix a benign linker warning
Documentation/x86/sgx.rst | 6 +
arch/x86/kernel/cpu/sgx/main.c | 10 +-
tools/testing/selftests/sgx/Makefile | 2 +-
tools/testing/selftests/sgx/defines.h | 33 +-
tools/testing/selftests/sgx/load.c | 40 +-
tools/testing/selftests/sgx/main.c | 341 +++++++++++++++---
tools/testing/selftests/sgx/main.h | 7 +-
tools/testing/selftests/sgx/sigstruct.c | 12 +-
tools/testing/selftests/sgx/test_encl.c | 60 ++-
.../selftests/sgx/test_encl_bootstrap.S | 21 +-
10 files changed, 445 insertions(+), 87 deletions(-)
--
2.25.1
From: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
commit 210f9df02611cbe641ced3239122b270fd907d86 upstream.
The selftest for ftrace checks some features by checking if the README has
text that states the feature is supported by that kernel. Unfortunately,
this check gives false positives because it many not be checked if there's
spaces in the string to check. This is due to the compare between the
required variable with the ":README" string stripped, because neither has
quotes around them.
Link: https://lkml.kernel.org/r/20210820204742.087177341@goodmis.org
Cc: "Tzvetomir Stoyanov" <tz.stoyanov(a)gmail.com>
Cc: Tom Zanussi <zanussi(a)kernel.org>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Shuah Khan <skhan(a)linuxfoundation.org>
Cc: linux-kselftest(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
Fixes: 1b8eec510ba64 ("selftests/ftrace: Support ":README" suffix for requires")
Acked-by: Masami Hiramatsu <mhiramat(a)kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
tools/testing/selftests/ftrace/test.d/functions | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -115,7 +115,7 @@ check_requires() { # Check required file
echo "Required tracer $t is not configured."
exit_unsupported
fi
- elif [ $r != $i ]; then
+ elif [ "$r" != "$i" ]; then
if ! grep -Fq "$r" README ; then
echo "Required feature pattern \"$r\" is not in README."
exit_unsupported
From: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
commit 210f9df02611cbe641ced3239122b270fd907d86 upstream.
The selftest for ftrace checks some features by checking if the README has
text that states the feature is supported by that kernel. Unfortunately,
this check gives false positives because it many not be checked if there's
spaces in the string to check. This is due to the compare between the
required variable with the ":README" string stripped, because neither has
quotes around them.
Link: https://lkml.kernel.org/r/20210820204742.087177341@goodmis.org
Cc: "Tzvetomir Stoyanov" <tz.stoyanov(a)gmail.com>
Cc: Tom Zanussi <zanussi(a)kernel.org>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Shuah Khan <skhan(a)linuxfoundation.org>
Cc: linux-kselftest(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
Fixes: 1b8eec510ba64 ("selftests/ftrace: Support ":README" suffix for requires")
Acked-by: Masami Hiramatsu <mhiramat(a)kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
tools/testing/selftests/ftrace/test.d/functions | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -115,7 +115,7 @@ check_requires() { # Check required file
echo "Required tracer $t is not configured."
exit_unsupported
fi
- elif [ $r != $i ]; then
+ elif [ "$r" != "$i" ]; then
if ! grep -Fq "$r" README ; then
echo "Required feature pattern \"$r\" is not in README."
exit_unsupported
From: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
commit 210f9df02611cbe641ced3239122b270fd907d86 upstream.
The selftest for ftrace checks some features by checking if the README has
text that states the feature is supported by that kernel. Unfortunately,
this check gives false positives because it many not be checked if there's
spaces in the string to check. This is due to the compare between the
required variable with the ":README" string stripped, because neither has
quotes around them.
Link: https://lkml.kernel.org/r/20210820204742.087177341@goodmis.org
Cc: "Tzvetomir Stoyanov" <tz.stoyanov(a)gmail.com>
Cc: Tom Zanussi <zanussi(a)kernel.org>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Shuah Khan <skhan(a)linuxfoundation.org>
Cc: linux-kselftest(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
Fixes: 1b8eec510ba64 ("selftests/ftrace: Support ":README" suffix for requires")
Acked-by: Masami Hiramatsu <mhiramat(a)kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
tools/testing/selftests/ftrace/test.d/functions | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -115,7 +115,7 @@ check_requires() { # Check required file
echo "Required tracer $t is not configured."
exit_unsupported
fi
- elif [ $r != $i ]; then
+ elif [ "$r" != "$i" ]; then
if ! grep -Fq "$r" README ; then
echo "Required feature pattern \"$r\" is not in README."
exit_unsupported
The XSAVE feature set supports the saving and restoring of state components,
which is used for process context switching. The state components include
x87 state for FPU execution environment, SSE state, AVX state and so on. In
order to ensure that XSAVE works correctly, add XSAVE basic test for
XSAVE architecture functionality.
This patch set tests and verifies the basic functions of XSAVE/XRSTOR in
user space; during and after signal processing on the x86 platform, the
XSAVE contents of the process should not be changed.
This series introduces only the most basic XSAVE tests. In the
future, the intention is to continue expanding the scope of
these selftests to include more kernel XSAVE-related functionality
and XSAVE-managed features like AMX and shadow stacks.
========
- Change from v3 to v4:
- Improve the comment in patch 1.
- Change from v2 to v3:
- Improve the description of patch 2 git log.
- Change from v1 to v2:
- Improve the cover-letter. (Dave Hansen)
Pengfei Xu (2):
selftests/xsave: test basic XSAVE architecture functionality
selftests/xsave: add xsave test during and after signal handling
tools/testing/selftests/Makefile | 1 +
tools/testing/selftests/xsave/.gitignore | 3 +
tools/testing/selftests/xsave/Makefile | 6 +
tools/testing/selftests/xsave/xsave_common.h | 246 ++++++++++++++++++
.../selftests/xsave/xsave_instruction.c | 83 ++++++
.../selftests/xsave/xsave_signal_handle.c | 184 +++++++++++++
6 files changed, 523 insertions(+)
create mode 100644 tools/testing/selftests/xsave/.gitignore
create mode 100644 tools/testing/selftests/xsave/Makefile
create mode 100644 tools/testing/selftests/xsave/xsave_common.h
create mode 100644 tools/testing/selftests/xsave/xsave_instruction.c
create mode 100644 tools/testing/selftests/xsave/xsave_signal_handle.c
--
2.20.1
LKP/0Day reported some building errors about kvm, and errors message
are not always same:
- lib/x86_64/processor.c:1083:31: error: ‘KVM_CAP_NESTED_STATE’ undeclared
(first use in this function); did you mean ‘KVM_CAP_PIT_STATE2’?
- lib/test_util.c:189:30: error: ‘MAP_HUGE_16KB’ undeclared (first use
in this function); did you mean ‘MAP_HUGE_16GB’?
Although kvm relies on the khdr, they still be built in parallel when -j
is specified. In this case, it will cause compiling errors.
Here we mark target khdr as NOTPARALLEL to make it be always built
first.
CC: Philip Li <philip.li(a)intel.com>
Reported-by: kernel test robot <lkp(a)intel.com>
Signed-off-by: Li Zhijian <lizhijian(a)cn.fujitsu.com>
---
tools/testing/selftests/lib.mk | 1 +
1 file changed, 1 insertion(+)
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 7ee911355328..5074b01f2a29 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -48,6 +48,7 @@ ARCH ?= $(SUBARCH)
# When local build is done, headers are installed in the default
# INSTALL_HDR_PATH usr/include.
.PHONY: khdr
+.NOTPARALLEL:
khdr:
ifndef KSFT_KHDR_INSTALL_DONE
ifeq (1,$(DEFAULT_INSTALL_HDR_PATH))
--
2.33.0
Introduction
============
This patch set depends on:
- support for the euid policy keyword for critical data
(https://lore.kernel.org/linux-integrity/20210705115650.3373599-1-roberto.sa…)
- basic DIGLIM
(https://lore.kernel.org/linux-integrity/20210914163401.864635-1-roberto.sas…)
Introduce the remaining features necessary to upload to the kernel
reference values from RPM headers or digest lists in other formats.
Loader: it will automatically uploads digest lists from a directory
specified in the kernel configuration and will execute a user space
uploader to upload digest lists in a format that is not recognized
by the kernel;
LSM: it identifies digest list parsers and monitor their activity for
integrity evaluation; it protects digest list parsers from other user
space processes considered as untrusted;
Digest list generators: user space tools to generate digest lists from
files (in the compact format) or from the RPM DB;
Digest list uploader and parsers: user space tools responsible to upload to
the kernel digest lists not in the
compact format (e.g. those derived from
the RPM DB);
Administration guide: it describes the steps necessary to upload to the
kernel all the digests of an RPM-based Linux
distribution, using a custom kernel with the DIGLIM
patches applied.
With these changes, DIGLIM is ready to be used by IMA for measurement and
appraisal (this functionality will be added with a future patch set).
DIGLIM already supports appended signatures, but at the moment they cannot
be interpreted by IMA (unsupported ID PKEY_ID_PGP). Another patch set is
necessary to load the PGP keys from the Linux distribution to the system
keyring and to verify the PGP signatures of the RPM headers.
With the patch sets above and the execution policies for IMA proposed some
time ago, it will be possible to generate a measurement list with digest
lists and unknown files, and enable IMA appraisal in enforcing mode.
The kernel command line would be:
ima_template=ima-modsig ima_policy="exec_tcb|tmpfs|digest_lists|appraise_exec_tcb|appraise_tmpfs|appraise_digest_lists"
The effort required for Linux distribution vendors will be to generate and
sign the digest lists for the digest list uploader and the RPM parser. This
could be done for example in the kernel-tools package (or in a separate
package). Existing package signatures are sufficient for remaining files.
Issues/Questions
================
Lockdep (patch 2/9)
-------------------
I'm using iterate_dir() and file_open_root() to iterate and open files
in a directory. Unfortunately, I get the following warning:
============================================
WARNING: possible recursive locking detected
5.15.0-rc1-dont-use-00049-ga5a881519991 #134 Not tainted
--------------------------------------------
swapper/1 is trying to acquire lock:
0000000066812898 (&sb->s_type->i_mutex_key#7){++++}-{4:4}, at: path_openat+0x75d/0xd20
but task is already holding lock:
0000000066812898 (&sb->s_type->i_mutex_key#7){++++}-{4:4}, at: iterate_dir+0x65/0x250
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(&sb->s_type->i_mutex_key#7);
lock(&sb->s_type->i_mutex_key#7);
*** DEADLOCK ***
due to the fact that path_openat() might be trying to lock the directory
already locked by iterate_dir(). What it would be a good way to avoid it?
Inode availability in security_file_free() (patch 3/9)
------------------------------------------------------
It seems that this hook is called when the last reference to a file is
released. After enabling debugging, sometimes the kernel reported that the
inode I was trying to access was already freed.
To avoid this situation, I'm grabbing an additional reference of the inode
in the security_file_open() hook, to ensure that the inode does not
disappear, and I'm releasing it in the security_file_free() hook. Is this
solution acceptable?
Roberto Sassu (9):
ima: Introduce new hook DIGEST_LIST_CHECK
diglim: Loader
diglim: LSM
diglim: Tests - LSM
diglim: Compact digest list generator
diglim: RPM digest list generator
diglim: Digest list uploader
diglim: RPM parser
diglim: Admin guide
Documentation/admin-guide/diglim.rst | 136 +++++
Documentation/admin-guide/index.rst | 1 +
.../security/diglim/implementation.rst | 16 +
Documentation/security/diglim/index.rst | 1 +
Documentation/security/diglim/lsm.rst | 65 +++
Documentation/security/diglim/tests.rst | 18 +-
MAINTAINERS | 10 +
security/integrity/diglim/Kconfig | 14 +
security/integrity/diglim/Makefile | 2 +-
security/integrity/diglim/diglim.h | 27 +
security/integrity/diglim/fs.c | 3 +
security/integrity/diglim/hooks.c | 436 ++++++++++++++++
security/integrity/diglim/loader.c | 92 ++++
security/integrity/iint.c | 1 +
security/integrity/ima/ima.h | 1 +
security/integrity/ima/ima_main.c | 3 +-
security/integrity/ima/ima_policy.c | 3 +
security/integrity/integrity.h | 8 +
tools/diglim/Makefile | 27 +
tools/diglim/common.c | 79 +++
tools/diglim/common.h | 59 +++
tools/diglim/compact_gen.c | 349 +++++++++++++
tools/diglim/rpm_gen.c | 334 ++++++++++++
tools/diglim/rpm_parser.c | 483 ++++++++++++++++++
tools/diglim/upload_digest_lists.c | 238 +++++++++
tools/testing/selftests/diglim/Makefile | 12 +-
tools/testing/selftests/diglim/common.h | 9 +
tools/testing/selftests/diglim/selftest.c | 357 ++++++++++++-
28 files changed, 2764 insertions(+), 20 deletions(-)
create mode 100644 Documentation/admin-guide/diglim.rst
create mode 100644 Documentation/security/diglim/lsm.rst
create mode 100644 security/integrity/diglim/hooks.c
create mode 100644 security/integrity/diglim/loader.c
create mode 100644 tools/diglim/Makefile
create mode 100644 tools/diglim/common.c
create mode 100644 tools/diglim/common.h
create mode 100644 tools/diglim/compact_gen.c
create mode 100644 tools/diglim/rpm_gen.c
create mode 100644 tools/diglim/rpm_parser.c
create mode 100644 tools/diglim/upload_digest_lists.c
--
2.25.1