In rseq_test, there are two threads, which are vCPU thread and migration
worker separately. Unfortunately, the test has the wrong PID passed to
sched_setaffinity() in the migration worker. It forces migration on the
migration worker because zeroed PID represents the calling thread, which
is the migration worker itself. It means the vCPU thread is never enforced
to migration and it can migrate at any time, which eventually leads to
failure as the following logs show.
host# uname -r
5.19.0-rc6-gavin+
host# # cat /proc/cpuinfo | grep processor | tail -n 1
processor : 223
host# pwd
/home/gavin/sandbox/linux.main/tools/testing/selftests/kvm
host# for i in `seq 1 100`; do \
echo "--------> $i"; ./rseq_test; done
--------> 1
--------> 2
--------> 3
--------> 4
--------> 5
--------> 6
==== Test Assertion Failure ====
rseq_test.c:265: rseq_cpu == cpu
pid=3925 tid=3925 errno=4 - Interrupted system call
1 0x0000000000401963: main at rseq_test.c:265 (discriminator 2)
2 0x0000ffffb044affb: ?? ??:0
3 0x0000ffffb044b0c7: ?? ??:0
4 0x0000000000401a6f: _start at ??:?
rseq CPU = 4, sched CPU = 27
Fix the issue by passing correct parameter, TID of the vCPU thread, to
sched_setaffinity() in the migration worker.
Fixes: 61e52f1630f5 ("KVM: selftests: Add a test for KVM_RUN+rseq to detect task migration bugs")
Signed-off-by: Gavin Shan <gshan(a)redhat.com>
Reviewed-by: Oliver Upton <oliver.upton(a)linux.dev>
---
v3: Improved changelog (Oliver Upon)
---
tools/testing/selftests/kvm/rseq_test.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index 4158da0da2bb..c83ac7b467f8 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -38,6 +38,7 @@ static __thread volatile struct rseq __rseq = {
*/
#define NR_TASK_MIGRATIONS 100000
+static pid_t rseq_tid;
static pthread_t migration_thread;
static cpu_set_t possible_mask;
static int min_cpu, max_cpu;
@@ -106,7 +107,8 @@ static void *migration_worker(void *ign)
* stable, i.e. while changing affinity is in-progress.
*/
smp_wmb();
- r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask);
+ r = sched_setaffinity(rseq_tid, sizeof(allowed_mask),
+ &allowed_mask);
TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
errno, strerror(errno));
smp_wmb();
@@ -231,6 +233,7 @@ int main(int argc, char *argv[])
vm = vm_create_default(VCPU_ID, 0, guest_code);
ucall_init(vm, NULL);
+ rseq_tid = gettid();
pthread_create(&migration_thread, NULL, migration_worker, 0);
for (i = 0; !done; i++) {
--
2.23.0
In rseq_test, there are two threads, which are thread group leader
and migration worker. The migration worker relies on sched_setaffinity()
to force migration on the thread group leader. Unfortunately, we
have wrong parameter (0) passed to sched_getaffinity(). It's actually
forcing migration on the migration worker instead of the thread group
leader. It also means migration can happen on the thread group leader
at any time, which eventually leads to failure as the following logs
show.
host# uname -r
5.19.0-rc6-gavin+
host# # cat /proc/cpuinfo | grep processor | tail -n 1
processor : 223
host# pwd
/home/gavin/sandbox/linux.main/tools/testing/selftests/kvm
host# for i in `seq 1 100`; \
do echo "--------> $i"; ./rseq_test; done
--------> 1
--------> 2
--------> 3
--------> 4
--------> 5
--------> 6
==== Test Assertion Failure ====
rseq_test.c:265: rseq_cpu == cpu
pid=3925 tid=3925 errno=4 - Interrupted system call
1 0x0000000000401963: main at rseq_test.c:265 (discriminator 2)
2 0x0000ffffb044affb: ?? ??:0
3 0x0000ffffb044b0c7: ?? ??:0
4 0x0000000000401a6f: _start at ??:?
rseq CPU = 4, sched CPU = 27
This fixes the issue by passing correct parameter, tid of the group
thread leader, to sched_setaffinity().
Fixes: 61e52f1630f5 ("KVM: selftests: Add a test for KVM_RUN+rseq to detect task migration bugs")
Signed-off-by: Gavin Shan <gshan(a)redhat.com>
---
tools/testing/selftests/kvm/rseq_test.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index 4158da0da2bb..c83ac7b467f8 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -38,6 +38,7 @@ static __thread volatile struct rseq __rseq = {
*/
#define NR_TASK_MIGRATIONS 100000
+static pid_t rseq_tid;
static pthread_t migration_thread;
static cpu_set_t possible_mask;
static int min_cpu, max_cpu;
@@ -106,7 +107,8 @@ static void *migration_worker(void *ign)
* stable, i.e. while changing affinity is in-progress.
*/
smp_wmb();
- r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask);
+ r = sched_setaffinity(rseq_tid, sizeof(allowed_mask),
+ &allowed_mask);
TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
errno, strerror(errno));
smp_wmb();
@@ -231,6 +233,7 @@ int main(int argc, char *argv[])
vm = vm_create_default(VCPU_ID, 0, guest_code);
ucall_init(vm, NULL);
+ rseq_tid = gettid();
pthread_create(&migration_thread, NULL, migration_worker, 0);
for (i = 0; !done; i++) {
--
2.23.0
Hi Dear,
My name is Dr Lily William from the United States.I am a French and
American nationality (dual) living in the U.S and sometimes in France
for Work Purpose.
I hope you consider my friend request. I will share some of my pics
and more details about myself when I get your response.
Thanks
With love
Lily
On 7/18/22 04:02, Stephen Rothwell wrote:
> Hi all,
>
> Changes since 20220715:
>
on x86_64:
vmlinux.o: in function `ne_misc_dev_test_merge_phys_contig_memory_regions':
ne_misc_dev.c:(.text+0x88eae7): undefined reference to `kunit_kmalloc_array'
ld: ne_misc_dev.c:(.text+0x88eafd): undefined reference to `kunit_unary_assert_format'
ld: ne_misc_dev.c:(.text+0x88eb25): undefined reference to `kunit_do_failed_assertion'
ld: ne_misc_dev.c:(.text+0x88ec0c): undefined reference to `kunit_binary_assert_format'
ld: ne_misc_dev.c:(.text+0x88ec3c): undefined reference to `kunit_do_failed_assertion'
ld: ne_misc_dev.c:(.text+0x88ec58): undefined reference to `kunit_binary_assert_format'
ld: ne_misc_dev.c:(.text+0x88ec88): undefined reference to `kunit_do_failed_assertion'
ld: ne_misc_dev.c:(.text+0x88ecc2): undefined reference to `kunit_binary_assert_format'
ld: ne_misc_dev.c:(.text+0x88ecf2): undefined reference to `kunit_do_failed_assertion'
ld: ne_misc_dev.c:(.text+0x88ed19): undefined reference to `kunit_binary_assert_format'
ld: ne_misc_dev.c:(.text+0x88ed49): undefined reference to `kunit_do_failed_assertion'
ld: ne_misc_dev.c:(.text+0x88ed7e): undefined reference to `kunit_kfree'
Full randconfig file is attached.
--
~Randy
On Sat, Jul 16, 2022 at 09:14:08AM +0800, li_jessen2016(a)gmail.com li wrote:
> Thanks for your kind reply. Then what should I do? To officially raise a
> bug to all the relevant persons in the kernel community?
Yeah, I'd figure out who works on the script and mail them about it (or
develop a patch if you feel up to it!).
In rseq_test, there are two threads created. Those two threads are
'main' and 'migration_thread' separately. We also have the assumption
that non-migration status on 'migration-worker' thread guarantees the
same non-migration status on 'main' thread. Unfortunately, the assumption
isn't true. The 'main' thread can be migrated from one CPU to another
one between the calls to sched_getcpu() and READ_ONCE(__rseq.cpu_id).
The following assert is raised eventually because of the mismatched
CPU numbers.
The issue can be reproduced on arm64 system occasionally.
host# uname -r
5.19.0-rc6-gavin+
host# # cat /proc/cpuinfo | grep processor | tail -n 1
processor : 223
host# pwd
/home/gavin/sandbox/linux.main/tools/testing/selftests/kvm
host# for i in `seq 1 100`; \
do echo "--------> $i"; \
./rseq_test; sleep 3; \
done
--------> 1
--------> 2
--------> 3
--------> 4
--------> 5
--------> 6
==== Test Assertion Failure ====
rseq_test.c:265: rseq_cpu == cpu
pid=3925 tid=3925 errno=4 - Interrupted system call
1 0x0000000000401963: main at rseq_test.c:265 (discriminator 2)
2 0x0000ffffb044affb: ?? ??:0
3 0x0000ffffb044b0c7: ?? ??:0
4 0x0000000000401a6f: _start at ??:?
rseq CPU = 4, sched CPU = 27
This fixes the issue by double-checking on the current CPU after
call to READ_ONCE(__rseq.cpu_id) and restarting the test if the
two consecutive CPU numbers aren't euqal.
Fixes: 61e52f1630f5 ("KVM: selftests: Add a test for KVM_RUN+rseq to detect task migration bugs")
Signed-off-by: Gavin Shan <gshan(a)redhat.com>
---
tools/testing/selftests/kvm/rseq_test.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index 4158da0da2bb..74709dd9f5b2 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -207,7 +207,7 @@ int main(int argc, char *argv[])
{
int r, i, snapshot;
struct kvm_vm *vm;
- u32 cpu, rseq_cpu;
+ u32 cpu, rseq_cpu, last_cpu;
/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);
@@ -259,8 +259,9 @@ int main(int argc, char *argv[])
smp_rmb();
cpu = sched_getcpu();
rseq_cpu = READ_ONCE(__rseq.cpu_id);
+ last_cpu = sched_getcpu();
smp_rmb();
- } while (snapshot != atomic_read(&seq_cnt));
+ } while (snapshot != atomic_read(&seq_cnt) || cpu != last_cpu);
TEST_ASSERT(rseq_cpu == cpu,
"rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu);
--
2.23.0