Implement the ucall() interface on s390x to be able to use the dirty_log_test KVM selftest on s390x, too.
Thomas Huth (2): KVM: selftests: Implement ucall() for s390x KVM: selftests: Enable dirty_log_test on s390x
tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/dirty_log_test.c | 70 +++++++++++++++++-- .../testing/selftests/kvm/include/kvm_util.h | 2 +- tools/testing/selftests/kvm/lib/ucall.c | 34 +++++++-- .../selftests/kvm/s390x/sync_regs_test.c | 6 +- 5 files changed, 98 insertions(+), 15 deletions(-)
On s390x, we can neither exit via PIO nor MMIO, but have to use an instruction like DIAGNOSE. While we're at it, rename UCALL_PIO to UCALL_DEFAULT, since PIO only works on x86 anyway, and this way we can re-use the "default" type for the DIAGNOSE exit on s390x.
Now that ucall() is implemented, we can use it in the sync_reg_test on s390x, too.
Signed-off-by: Thomas Huth thuth@redhat.com --- .../testing/selftests/kvm/include/kvm_util.h | 2 +- tools/testing/selftests/kvm/lib/ucall.c | 34 +++++++++++++++---- .../selftests/kvm/s390x/sync_regs_test.c | 6 ++-- 3 files changed, 32 insertions(+), 10 deletions(-)
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index e0e66b115ef2..c37aea2e33e5 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -167,7 +167,7 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
/* ucall implementation types */ typedef enum { - UCALL_PIO, + UCALL_DEFAULT, UCALL_MMIO, } ucall_type_t;
diff --git a/tools/testing/selftests/kvm/lib/ucall.c b/tools/testing/selftests/kvm/lib/ucall.c index dd9a66700f96..55534dd014dc 100644 --- a/tools/testing/selftests/kvm/lib/ucall.c +++ b/tools/testing/selftests/kvm/lib/ucall.c @@ -30,7 +30,7 @@ void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg) ucall_type = type; sync_global_to_guest(vm, ucall_type);
- if (type == UCALL_PIO) + if (type == UCALL_DEFAULT) return;
if (type == UCALL_MMIO) { @@ -84,11 +84,18 @@ void ucall_uninit(struct kvm_vm *vm) sync_global_to_guest(vm, ucall_exit_mmio_addr); }
-static void ucall_pio_exit(struct ucall *uc) +static void ucall_default_exit(struct ucall *uc) { -#ifdef __x86_64__ +#if defined(__x86_64__) + /* Exit via PIO */ asm volatile("in %[port], %%al" : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax"); +#elif defined(__s390x__) + /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ + asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory"); +#else + fprintf(stderr, "No default ucall available on this architecture.\n"); + exit(1); #endif }
@@ -113,8 +120,8 @@ void ucall(uint64_t cmd, int nargs, ...) va_end(va);
switch (ucall_type) { - case UCALL_PIO: - ucall_pio_exit(&uc); + case UCALL_DEFAULT: + ucall_default_exit(&uc); break; case UCALL_MMIO: ucall_mmio_exit(&uc); @@ -128,15 +135,28 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct ucall ucall = {}; bool got_ucall = false;
-#ifdef __x86_64__ - if (ucall_type == UCALL_PIO && run->exit_reason == KVM_EXIT_IO && +#if defined(__x86_64__) + if (ucall_type == UCALL_DEFAULT && run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { struct kvm_regs regs; vcpu_regs_get(vm, vcpu_id, ®s); memcpy(&ucall, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi), sizeof(ucall)); got_ucall = true; } +#elif defined(__s390x__) + if (ucall_type == UCALL_DEFAULT && + run->exit_reason == KVM_EXIT_S390_SIEIC && + run->s390_sieic.icptcode == 4 && + (run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */ + (run->s390_sieic.ipb >> 16) == 0x501) { + int reg = run->s390_sieic.ipa & 0xf; + + memcpy(&ucall, addr_gva2hva(vm, run->s.regs.gprs[reg]), + sizeof(ucall)); + got_ucall = true; + } #endif + if (ucall_type == UCALL_MMIO && run->exit_reason == KVM_EXIT_MMIO && run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) { vm_vaddr_t gva; diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c index e85ff0d69548..bbc93094519b 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c @@ -25,9 +25,11 @@
static void guest_code(void) { + register u64 stage asm("11") = 0; + for (;;) { - asm volatile ("diag 0,0,0x501"); - asm volatile ("ahi 11,1"); + GUEST_SYNC(0); + asm volatile ("ahi %0,1" : : "r"(stage)); } }
On Tue, Jul 30, 2019 at 12:01:11PM +0200, Thomas Huth wrote:
On s390x, we can neither exit via PIO nor MMIO, but have to use an instruction like DIAGNOSE. While we're at it, rename UCALL_PIO to UCALL_DEFAULT, since PIO only works on x86 anyway, and this way we can re-use the "default" type for the DIAGNOSE exit on s390x.
Now that ucall() is implemented, we can use it in the sync_reg_test on s390x, too.
Signed-off-by: Thomas Huth thuth@redhat.com
.../testing/selftests/kvm/include/kvm_util.h | 2 +- tools/testing/selftests/kvm/lib/ucall.c | 34 +++++++++++++++---- .../selftests/kvm/s390x/sync_regs_test.c | 6 ++-- 3 files changed, 32 insertions(+), 10 deletions(-)
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index e0e66b115ef2..c37aea2e33e5 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -167,7 +167,7 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); /* ucall implementation types */ typedef enum {
- UCALL_PIO,
- UCALL_DEFAULT,
I'd rather we keep explicit types defined; keep PIO and add DIAG. Then we can have
/* Set default ucall types */ #if defined(__x86_64__) ucall_type = UCALL_PIO; #elif defined(__aarch64__) ucall_type = UCALL_MMIO; ucall_requires_init = true; #elif defined(__s390x__) ucall_type = UCALL_DIAG; #endif
And add an assert in get_ucall()
assert(!ucall_requires_init || ucall_initialized);
UCALL_MMIO, } ucall_type_t; diff --git a/tools/testing/selftests/kvm/lib/ucall.c b/tools/testing/selftests/kvm/lib/ucall.c index dd9a66700f96..55534dd014dc 100644 --- a/tools/testing/selftests/kvm/lib/ucall.c +++ b/tools/testing/selftests/kvm/lib/ucall.c @@ -30,7 +30,7 @@ void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg) ucall_type = type; sync_global_to_guest(vm, ucall_type);
- if (type == UCALL_PIO)
- if (type == UCALL_DEFAULT) return;
if (type == UCALL_MMIO) { @@ -84,11 +84,18 @@ void ucall_uninit(struct kvm_vm *vm) sync_global_to_guest(vm, ucall_exit_mmio_addr); } -static void ucall_pio_exit(struct ucall *uc) +static void ucall_default_exit(struct ucall *uc) { -#ifdef __x86_64__ +#if defined(__x86_64__)
- /* Exit via PIO */ asm volatile("in %[port], %%al" : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax");
+#elif defined(__s390x__)
- /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
- asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
+#else
- fprintf(stderr, "No default ucall available on this architecture.\n");
- exit(1);
#endif } @@ -113,8 +120,8 @@ void ucall(uint64_t cmd, int nargs, ...) va_end(va); switch (ucall_type) {
- case UCALL_PIO:
ucall_pio_exit(&uc);
- case UCALL_DEFAULT:
break; case UCALL_MMIO: ucall_mmio_exit(&uc);ucall_default_exit(&uc);
@@ -128,15 +135,28 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct ucall ucall = {}; bool got_ucall = false; -#ifdef __x86_64__
- if (ucall_type == UCALL_PIO && run->exit_reason == KVM_EXIT_IO &&
+#if defined(__x86_64__)
- if (ucall_type == UCALL_DEFAULT && run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { struct kvm_regs regs; vcpu_regs_get(vm, vcpu_id, ®s); memcpy(&ucall, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi), sizeof(ucall)); got_ucall = true; }
+#elif defined(__s390x__)
- if (ucall_type == UCALL_DEFAULT &&
run->exit_reason == KVM_EXIT_S390_SIEIC &&
run->s390_sieic.icptcode == 4 &&
(run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */
(run->s390_sieic.ipb >> 16) == 0x501) {
int reg = run->s390_sieic.ipa & 0xf;
memcpy(&ucall, addr_gva2hva(vm, run->s.regs.gprs[reg]),
sizeof(ucall));
got_ucall = true;
- }
#endif
- if (ucall_type == UCALL_MMIO && run->exit_reason == KVM_EXIT_MMIO && run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) { vm_vaddr_t gva;
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c index e85ff0d69548..bbc93094519b 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c @@ -25,9 +25,11 @@ static void guest_code(void) {
- register u64 stage asm("11") = 0;
- for (;;) {
asm volatile ("diag 0,0,0x501");
asm volatile ("ahi 11,1");
GUEST_SYNC(0);
}asm volatile ("ahi %0,1" : : "r"(stage));
} -- 2.21.0
Thanks, drew
On 30/07/2019 12.48, Andrew Jones wrote:
On Tue, Jul 30, 2019 at 12:01:11PM +0200, Thomas Huth wrote:
On s390x, we can neither exit via PIO nor MMIO, but have to use an instruction like DIAGNOSE. While we're at it, rename UCALL_PIO to UCALL_DEFAULT, since PIO only works on x86 anyway, and this way we can re-use the "default" type for the DIAGNOSE exit on s390x.
Now that ucall() is implemented, we can use it in the sync_reg_test on s390x, too.
Signed-off-by: Thomas Huth thuth@redhat.com
.../testing/selftests/kvm/include/kvm_util.h | 2 +- tools/testing/selftests/kvm/lib/ucall.c | 34 +++++++++++++++---- .../selftests/kvm/s390x/sync_regs_test.c | 6 ++-- 3 files changed, 32 insertions(+), 10 deletions(-)
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index e0e66b115ef2..c37aea2e33e5 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -167,7 +167,7 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); /* ucall implementation types */ typedef enum {
- UCALL_PIO,
- UCALL_DEFAULT,
I'd rather we keep explicit types defined; keep PIO and add DIAG. Then we can have
/* Set default ucall types */ #if defined(__x86_64__) ucall_type = UCALL_PIO; #elif defined(__aarch64__) ucall_type = UCALL_MMIO; ucall_requires_init = true; #elif defined(__s390x__) ucall_type = UCALL_DIAG; #endif
And add an assert in get_ucall()
assert(!ucall_requires_init || ucall_initialized);
I'm not sure whether I really like that. It's yet another additional #ifdef block, and yet another variable ...
What do you think about removing the enum completely and simply code it directly, without the ucall_type indirection, i.e.:
void ucall(uint64_t cmd, int nargs, ...) { struct ucall uc = { .cmd = cmd, }; va_list va; int i;
nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
va_start(va, nargs); for (i = 0; i < nargs; ++i) uc.args[i] = va_arg(va, uint64_t); va_end(va);
#if defined(__x86_64__)
/* Exit via PIO */ asm volatile("in %[port], %%al" : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax");
#elif defined(__aarch64__)
*ucall_exit_mmio_addr = (vm_vaddr_t)&uc;
#elif defined(__s390x__)
/* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ asm volatile ("diag 0,%0,0x501" : : "a"(&uc) : "memory");
#endif }
I think that's way less confusing than having to understand the meaning of ucall_type etc. before...?
Thomas
On Wed, Jul 31, 2019 at 11:43:16AM +0200, Thomas Huth wrote:
On 30/07/2019 12.48, Andrew Jones wrote:
On Tue, Jul 30, 2019 at 12:01:11PM +0200, Thomas Huth wrote:
On s390x, we can neither exit via PIO nor MMIO, but have to use an instruction like DIAGNOSE. While we're at it, rename UCALL_PIO to UCALL_DEFAULT, since PIO only works on x86 anyway, and this way we can re-use the "default" type for the DIAGNOSE exit on s390x.
Now that ucall() is implemented, we can use it in the sync_reg_test on s390x, too.
Signed-off-by: Thomas Huth thuth@redhat.com
.../testing/selftests/kvm/include/kvm_util.h | 2 +- tools/testing/selftests/kvm/lib/ucall.c | 34 +++++++++++++++---- .../selftests/kvm/s390x/sync_regs_test.c | 6 ++-- 3 files changed, 32 insertions(+), 10 deletions(-)
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index e0e66b115ef2..c37aea2e33e5 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -167,7 +167,7 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); /* ucall implementation types */ typedef enum {
- UCALL_PIO,
- UCALL_DEFAULT,
I'd rather we keep explicit types defined; keep PIO and add DIAG. Then we can have
/* Set default ucall types */ #if defined(__x86_64__) ucall_type = UCALL_PIO; #elif defined(__aarch64__) ucall_type = UCALL_MMIO; ucall_requires_init = true; #elif defined(__s390x__) ucall_type = UCALL_DIAG; #endif
And add an assert in get_ucall()
assert(!ucall_requires_init || ucall_initialized);
I'm not sure whether I really like that. It's yet another additional #ifdef block, and yet another variable ...
What do you think about removing the enum completely and simply code it directly, without the ucall_type indirection, i.e.:
void ucall(uint64_t cmd, int nargs, ...) { struct ucall uc = { .cmd = cmd, }; va_list va; int i;
nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
va_start(va, nargs); for (i = 0; i < nargs; ++i) uc.args[i] = va_arg(va, uint64_t); va_end(va);
#if defined(__x86_64__)
/* Exit via PIO */ asm volatile("in %[port], %%al" : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax");
#elif defined(__aarch64__)
*ucall_exit_mmio_addr = (vm_vaddr_t)&uc;
#elif defined(__s390x__)
/* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ asm volatile ("diag 0,%0,0x501" : : "a"(&uc) : "memory");
#endif }
I think that's way less confusing than having to understand the meaning of ucall_type etc. before...?
Sounds good to me.
Thanks, drew
On 31/07/2019 12.28, Andrew Jones wrote:
On Wed, Jul 31, 2019 at 11:43:16AM +0200, Thomas Huth wrote:
On 30/07/2019 12.48, Andrew Jones wrote:
On Tue, Jul 30, 2019 at 12:01:11PM +0200, Thomas Huth wrote:
On s390x, we can neither exit via PIO nor MMIO, but have to use an instruction like DIAGNOSE. While we're at it, rename UCALL_PIO to UCALL_DEFAULT, since PIO only works on x86 anyway, and this way we can re-use the "default" type for the DIAGNOSE exit on s390x.
Now that ucall() is implemented, we can use it in the sync_reg_test on s390x, too.
Signed-off-by: Thomas Huth thuth@redhat.com
.../testing/selftests/kvm/include/kvm_util.h | 2 +- tools/testing/selftests/kvm/lib/ucall.c | 34 +++++++++++++++---- .../selftests/kvm/s390x/sync_regs_test.c | 6 ++-- 3 files changed, 32 insertions(+), 10 deletions(-)
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index e0e66b115ef2..c37aea2e33e5 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -167,7 +167,7 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); /* ucall implementation types */ typedef enum {
- UCALL_PIO,
- UCALL_DEFAULT,
I'd rather we keep explicit types defined; keep PIO and add DIAG. Then we can have
/* Set default ucall types */ #if defined(__x86_64__) ucall_type = UCALL_PIO; #elif defined(__aarch64__) ucall_type = UCALL_MMIO; ucall_requires_init = true; #elif defined(__s390x__) ucall_type = UCALL_DIAG; #endif
And add an assert in get_ucall()
assert(!ucall_requires_init || ucall_initialized);
I'm not sure whether I really like that. It's yet another additional #ifdef block, and yet another variable ...
What do you think about removing the enum completely and simply code it directly, without the ucall_type indirection, i.e.:
void ucall(uint64_t cmd, int nargs, ...) { struct ucall uc = { .cmd = cmd, }; va_list va; int i;
nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
va_start(va, nargs); for (i = 0; i < nargs; ++i) uc.args[i] = va_arg(va, uint64_t); va_end(va);
#if defined(__x86_64__)
/* Exit via PIO */ asm volatile("in %[port], %%al" : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax");
#elif defined(__aarch64__)
*ucall_exit_mmio_addr = (vm_vaddr_t)&uc;
#elif defined(__s390x__)
/* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ asm volatile ("diag 0,%0,0x501" : : "a"(&uc) : "memory");
#endif }
I think that's way less confusing than having to understand the meaning of ucall_type etc. before...?
Sounds good to me.
Or maybe even better: Let's move this file into lib/x86_64/ and lib/aarch64/ instead, since there is more different code between the architectures here than common code.
Thomas
On 31/07/19 13:16, Thomas Huth wrote:
Or maybe even better: Let's move this file into lib/x86_64/ and lib/aarch64/ instead, since there is more different code between the architectures here than common code.
All good solutions, just choose one. :))
Paolo
On Wed, Jul 31, 2019 at 02:57:38PM +0200, Paolo Bonzini wrote:
On 31/07/19 13:16, Thomas Huth wrote:
Or maybe even better: Let's move this file into lib/x86_64/ and lib/aarch64/ instead, since there is more different code between the architectures here than common code.
All good solutions, just choose one. :))
Agreed, and I like this last solution (move to arch-code) the best.
Thanks, drew
To run the dirty_log_test on s390x, we have to make sure that we access the dirty log bitmap with little endian byte ordering and we have to properly align the memslot of the guest. Also all dirty bits of a segment are set once on s390x when one of the pages of a segment are written to for the first time, so we have to make sure that we touch all pages during the first iteration to keep the test in sync here.
Signed-off-by: Thomas Huth thuth@redhat.com --- tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/dirty_log_test.c | 70 ++++++++++++++++++-- 2 files changed, 66 insertions(+), 5 deletions(-)
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index ba7849751989..ac7e63e00fee 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -33,6 +33,7 @@ TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
TEST_GEN_PROGS_s390x += s390x/sync_regs_test +TEST_GEN_PROGS_s390x += dirty_log_test TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index ceb52b952637..7a1223ad0ff3 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -26,9 +26,22 @@ /* The memory slot index to track dirty pages */ #define TEST_MEM_SLOT_INDEX 1
+#ifdef __s390x__ + +/* + * On s390x, the ELF program is sometimes linked at 0x80000000, so we can + * not use 0x40000000 here without overlapping into that region. Thus let's + * use 0xc0000000 as base address there instead. + */ +#define DEFAULT_GUEST_TEST_MEM 0xc0000000 + +#else + /* Default guest test memory offset, 1G */ #define DEFAULT_GUEST_TEST_MEM 0x40000000
+#endif + /* How many pages to dirty for each guest loop */ #define TEST_PAGES_PER_LOOP 1024
@@ -38,6 +51,27 @@ /* Interval for each host loop (ms) */ #define TEST_HOST_LOOP_INTERVAL 10UL
+/* Dirty bitmaps are always little endian, so we need to swap on big endian */ +#if defined(__s390x__) +# define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) +# define test_bit_le(nr, addr) \ + test_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define set_bit_le(nr, addr) \ + set_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define clear_bit_le(nr, addr) \ + clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define test_and_set_bit_le(nr, addr) \ + test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define test_and_clear_bit_le(nr, addr) \ + test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +#else +# define test_bit_le test_bit +# define set_bit_le set_bit +# define clear_bit_le clear_bit +# define test_and_set_bit_le test_and_set_bit +# define test_and_clear_bit_le test_and_clear_bit +#endif + /* * Guest/Host shared variables. Ensure addr_gva2hva() and/or * sync_global_to/from_guest() are used when accessing from @@ -69,11 +103,25 @@ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; */ static void guest_code(void) { + uint64_t addr; int i;
+#ifdef __s390x__ + /* + * On s390x, all pages of a 1M segment are initially marked as dirty + * when a page of the segment is written to for the very first time. + * To compensate this specialty in this test, we need to touch all + * pages during the first iteration. + */ + for (i = 0; i < guest_num_pages; i++) { + addr = guest_test_virt_mem + i * guest_page_size; + *(uint64_t *)addr = READ_ONCE(iteration); + } +#endif + while (true) { for (i = 0; i < TEST_PAGES_PER_LOOP; i++) { - uint64_t addr = guest_test_virt_mem; + addr = guest_test_virt_mem; addr += (READ_ONCE(random_array[i]) % guest_num_pages) * guest_page_size; addr &= ~(host_page_size - 1); @@ -158,15 +206,15 @@ static void vm_dirty_log_verify(unsigned long *bmap) value_ptr = host_test_mem + page * host_page_size;
/* If this is a special page that we were tracking... */ - if (test_and_clear_bit(page, host_bmap_track)) { + if (test_and_clear_bit_le(page, host_bmap_track)) { host_track_next_count++; - TEST_ASSERT(test_bit(page, bmap), + TEST_ASSERT(test_bit_le(page, bmap), "Page %"PRIu64" should have its dirty bit " "set in this iteration but it is missing", page); }
- if (test_bit(page, bmap)) { + if (test_bit_le(page, bmap)) { host_dirty_count++; /* * If the bit is set, the value written onto @@ -209,7 +257,7 @@ static void vm_dirty_log_verify(unsigned long *bmap) * should report its dirtyness in the * next run */ - set_bit(page, host_bmap_track); + set_bit_le(page, host_bmap_track); } } } @@ -293,6 +341,10 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, * case where the size is not aligned to 64 pages. */ guest_num_pages = (1ul << (30 - guest_page_shift)) + 16; +#ifdef __s390x__ + /* Round up to multiple of 1M (segment size) */ + guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL; +#endif host_page_size = getpagesize(); host_num_pages = (guest_num_pages * guest_page_size) / host_page_size + !!((guest_num_pages * guest_page_size) % host_page_size); @@ -304,6 +356,11 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, guest_test_phys_mem = phys_offset; }
+#ifdef __s390x__ + /* Align to 1M (segment size) */ + guest_test_phys_mem &= ~((1 << 20) - 1); +#endif + DEBUG("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
bmap = bitmap_alloc(host_num_pages); @@ -454,6 +511,9 @@ int main(int argc, char *argv[]) vm_guest_mode_params_init(VM_MODE_P48V48_64K, true, true); } #endif +#ifdef __s390x__ + vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true); +#endif
while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) { switch (opt) {
On Tue, Jul 30, 2019 at 12:01:12PM +0200, Thomas Huth wrote:
To run the dirty_log_test on s390x, we have to make sure that we access the dirty log bitmap with little endian byte ordering and we have to properly align the memslot of the guest. Also all dirty bits of a segment are set once on s390x when one of the pages of a segment are written to for the first time, so we have to make sure that we touch all pages during the first iteration to keep the test in sync here.
Signed-off-by: Thomas Huth thuth@redhat.com
tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/dirty_log_test.c | 70 ++++++++++++++++++-- 2 files changed, 66 insertions(+), 5 deletions(-)
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index ba7849751989..ac7e63e00fee 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -33,6 +33,7 @@ TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus TEST_GEN_PROGS_s390x += s390x/sync_regs_test +TEST_GEN_PROGS_s390x += dirty_log_test TEST_GEN_PROGS_s390x += kvm_create_max_vcpus TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index ceb52b952637..7a1223ad0ff3 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -26,9 +26,22 @@ /* The memory slot index to track dirty pages */ #define TEST_MEM_SLOT_INDEX 1 +#ifdef __s390x__
+/*
- On s390x, the ELF program is sometimes linked at 0x80000000, so we can
- not use 0x40000000 here without overlapping into that region. Thus let's
- use 0xc0000000 as base address there instead.
- */
+#define DEFAULT_GUEST_TEST_MEM 0xc0000000
I think both x86 and aarch64 should be ok with this offset. If testing proves it does, then we can just change it for all architecture.
+#else
/* Default guest test memory offset, 1G */ #define DEFAULT_GUEST_TEST_MEM 0x40000000 +#endif
/* How many pages to dirty for each guest loop */ #define TEST_PAGES_PER_LOOP 1024 @@ -38,6 +51,27 @@ /* Interval for each host loop (ms) */ #define TEST_HOST_LOOP_INTERVAL 10UL +/* Dirty bitmaps are always little endian, so we need to swap on big endian */ +#if defined(__s390x__) +# define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) +# define test_bit_le(nr, addr) \
- test_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define set_bit_le(nr, addr) \
- set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define clear_bit_le(nr, addr) \
- clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define test_and_set_bit_le(nr, addr) \
- test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define test_and_clear_bit_le(nr, addr) \
- test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+#else +# define test_bit_le test_bit +# define set_bit_le set_bit +# define clear_bit_le clear_bit +# define test_and_set_bit_le test_and_set_bit +# define test_and_clear_bit_le test_and_clear_bit +#endif
nit: does the formatting above look right after applying the patch?
/*
- Guest/Host shared variables. Ensure addr_gva2hva() and/or
- sync_global_to/from_guest() are used when accessing from
@@ -69,11 +103,25 @@ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; */ static void guest_code(void) {
- uint64_t addr; int i;
+#ifdef __s390x__
- /*
* On s390x, all pages of a 1M segment are initially marked as dirty
* when a page of the segment is written to for the very first time.
* To compensate this specialty in this test, we need to touch all
* pages during the first iteration.
*/
- for (i = 0; i < guest_num_pages; i++) {
addr = guest_test_virt_mem + i * guest_page_size;
*(uint64_t *)addr = READ_ONCE(iteration);
- }
+#endif
- while (true) { for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
uint64_t addr = guest_test_virt_mem;
addr = guest_test_virt_mem; addr += (READ_ONCE(random_array[i]) % guest_num_pages) * guest_page_size; addr &= ~(host_page_size - 1);
@@ -158,15 +206,15 @@ static void vm_dirty_log_verify(unsigned long *bmap) value_ptr = host_test_mem + page * host_page_size; /* If this is a special page that we were tracking... */
if (test_and_clear_bit(page, host_bmap_track)) {
if (test_and_clear_bit_le(page, host_bmap_track)) { host_track_next_count++;
TEST_ASSERT(test_bit(page, bmap),
}TEST_ASSERT(test_bit_le(page, bmap), "Page %"PRIu64" should have its dirty bit " "set in this iteration but it is missing", page);
if (test_bit(page, bmap)) {
if (test_bit_le(page, bmap)) { host_dirty_count++; /* * If the bit is set, the value written onto
@@ -209,7 +257,7 @@ static void vm_dirty_log_verify(unsigned long *bmap) * should report its dirtyness in the * next run */
set_bit(page, host_bmap_track);
} }set_bit_le(page, host_bmap_track); }
@@ -293,6 +341,10 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, * case where the size is not aligned to 64 pages. */ guest_num_pages = (1ul << (30 - guest_page_shift)) + 16; +#ifdef __s390x__
- /* Round up to multiple of 1M (segment size) */
- guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL;
We could maybe do this for all architectures as well.
+#endif host_page_size = getpagesize(); host_num_pages = (guest_num_pages * guest_page_size) / host_page_size + !!((guest_num_pages * guest_page_size) % host_page_size); @@ -304,6 +356,11 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, guest_test_phys_mem = phys_offset; } +#ifdef __s390x__
- /* Align to 1M (segment size) */
- guest_test_phys_mem &= ~((1 << 20) - 1);
and this
+#endif
- DEBUG("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
bmap = bitmap_alloc(host_num_pages); @@ -454,6 +511,9 @@ int main(int argc, char *argv[]) vm_guest_mode_params_init(VM_MODE_P48V48_64K, true, true); } #endif +#ifdef __s390x__
- vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true);
+#endif while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) { switch (opt) { -- 2.21.0
Thanks, drew
On 30/07/2019 12.57, Andrew Jones wrote:
On Tue, Jul 30, 2019 at 12:01:12PM +0200, Thomas Huth wrote:
To run the dirty_log_test on s390x, we have to make sure that we access the dirty log bitmap with little endian byte ordering and we have to properly align the memslot of the guest. Also all dirty bits of a segment are set once on s390x when one of the pages of a segment are written to for the first time, so we have to make sure that we touch all pages during the first iteration to keep the test in sync here.
Signed-off-by: Thomas Huth thuth@redhat.com
[...]
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index ceb52b952637..7a1223ad0ff3 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -26,9 +26,22 @@ /* The memory slot index to track dirty pages */ #define TEST_MEM_SLOT_INDEX 1 +#ifdef __s390x__
+/*
- On s390x, the ELF program is sometimes linked at 0x80000000, so we can
- not use 0x40000000 here without overlapping into that region. Thus let's
- use 0xc0000000 as base address there instead.
- */
+#define DEFAULT_GUEST_TEST_MEM 0xc0000000
I think both x86 and aarch64 should be ok with this offset. If testing proves it does, then we can just change it for all architecture.
Ok. It seems to work on x86 - could you please check aarch64, since I don't have such a system available right now?
+/* Dirty bitmaps are always little endian, so we need to swap on big endian */ +#if defined(__s390x__) +# define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) +# define test_bit_le(nr, addr) \
- test_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define set_bit_le(nr, addr) \
- set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define clear_bit_le(nr, addr) \
- clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define test_and_set_bit_le(nr, addr) \
- test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define test_and_clear_bit_le(nr, addr) \
- test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+#else +# define test_bit_le test_bit +# define set_bit_le set_bit +# define clear_bit_le clear_bit +# define test_and_set_bit_le test_and_set_bit +# define test_and_clear_bit_le test_and_clear_bit +#endif
nit: does the formatting above look right after applying the patch?
It looked ok to me, but I can add some more tabs to even make it nicer :)
@@ -293,6 +341,10 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, * case where the size is not aligned to 64 pages. */ guest_num_pages = (1ul << (30 - guest_page_shift)) + 16; +#ifdef __s390x__
- /* Round up to multiple of 1M (segment size) */
- guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL;
We could maybe do this for all architectures as well.
It's really only needed on s390x, so I think we should keep the #ifdef here.
Thomas
On Wed, Jul 31, 2019 at 10:19:57AM +0200, Thomas Huth wrote:
On 30/07/2019 12.57, Andrew Jones wrote:
On Tue, Jul 30, 2019 at 12:01:12PM +0200, Thomas Huth wrote:
To run the dirty_log_test on s390x, we have to make sure that we access the dirty log bitmap with little endian byte ordering and we have to properly align the memslot of the guest. Also all dirty bits of a segment are set once on s390x when one of the pages of a segment are written to for the first time, so we have to make sure that we touch all pages during the first iteration to keep the test in sync here.
Signed-off-by: Thomas Huth thuth@redhat.com
[...]
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index ceb52b952637..7a1223ad0ff3 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -26,9 +26,22 @@ /* The memory slot index to track dirty pages */ #define TEST_MEM_SLOT_INDEX 1 +#ifdef __s390x__
+/*
- On s390x, the ELF program is sometimes linked at 0x80000000, so we can
- not use 0x40000000 here without overlapping into that region. Thus let's
- use 0xc0000000 as base address there instead.
- */
+#define DEFAULT_GUEST_TEST_MEM 0xc0000000
I think both x86 and aarch64 should be ok with this offset. If testing proves it does, then we can just change it for all architecture.
Ok. It seems to work on x86 - could you please check aarch64, since I don't have such a system available right now?
Tested it. It works on aarch64 too.
+/* Dirty bitmaps are always little endian, so we need to swap on big endian */ +#if defined(__s390x__) +# define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) +# define test_bit_le(nr, addr) \
- test_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define set_bit_le(nr, addr) \
- set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define clear_bit_le(nr, addr) \
- clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define test_and_set_bit_le(nr, addr) \
- test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define test_and_clear_bit_le(nr, addr) \
- test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+#else +# define test_bit_le test_bit +# define set_bit_le set_bit +# define clear_bit_le clear_bit +# define test_and_set_bit_le test_and_set_bit +# define test_and_clear_bit_le test_and_clear_bit +#endif
nit: does the formatting above look right after applying the patch?
It looked ok to me, but I can add some more tabs to even make it nicer :)
@@ -293,6 +341,10 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, * case where the size is not aligned to 64 pages. */ guest_num_pages = (1ul << (30 - guest_page_shift)) + 16; +#ifdef __s390x__
- /* Round up to multiple of 1M (segment size) */
- guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL;
We could maybe do this for all architectures as well.
It's really only needed on s390x, so I think we should keep the #ifdef here.
OK
Thanks, drew
On 31/07/19 10:19, Thomas Huth wrote:
@@ -293,6 +341,10 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, * case where the size is not aligned to 64 pages. */ guest_num_pages = (1ul << (30 - guest_page_shift)) + 16; +#ifdef __s390x__
- /* Round up to multiple of 1M (segment size) */
- guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL;
We could maybe do this for all architectures as well.
It's really only needed on s390x, so I think we should keep the #ifdef here.
Yes, on non-s390 we should keep covering the case where the size is not a multiple of BITS_PER_LONG.
Paolo
On 30/07/19 12:01, Thomas Huth wrote:
+#ifdef __s390x__
- /*
* On s390x, all pages of a 1M segment are initially marked as dirty
* when a page of the segment is written to for the very first time.
* To compensate this specialty in this test, we need to touch all
* pages during the first iteration.
*/
- for (i = 0; i < guest_num_pages; i++) {
addr = guest_test_virt_mem + i * guest_page_size;
*(uint64_t *)addr = READ_ONCE(iteration);
- }
+#endif
Go ahead and make this unconditional.
Paolo
On 30.07.19 12:01, Thomas Huth wrote:
To run the dirty_log_test on s390x, we have to make sure that we access the dirty log bitmap with little endian byte ordering and we have to properly align the memslot of the guest. Also all dirty bits of a segment are set once on s390x when one of the pages of a segment are written to for the first time, so we have to make sure that we touch all pages during the first iteration to keep the test in sync here.
While this fixes the test (and the migration does work fine), it still means that s390x overindicates the dirty bit for sparsely populated 1M segments. It is just a performance issue, but maybe we should try to get this fixed. Not sure what to do here to remember us about this, adding this as expected fail?
Signed-off-by: Thomas Huth thuth@redhat.com
tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/dirty_log_test.c | 70 ++++++++++++++++++-- 2 files changed, 66 insertions(+), 5 deletions(-)
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index ba7849751989..ac7e63e00fee 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -33,6 +33,7 @@ TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus TEST_GEN_PROGS_s390x += s390x/sync_regs_test +TEST_GEN_PROGS_s390x += dirty_log_test TEST_GEN_PROGS_s390x += kvm_create_max_vcpus TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index ceb52b952637..7a1223ad0ff3 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -26,9 +26,22 @@ /* The memory slot index to track dirty pages */ #define TEST_MEM_SLOT_INDEX 1 +#ifdef __s390x__
+/*
- On s390x, the ELF program is sometimes linked at 0x80000000, so we can
- not use 0x40000000 here without overlapping into that region. Thus let's
- use 0xc0000000 as base address there instead.
- */
+#define DEFAULT_GUEST_TEST_MEM 0xc0000000
+#else
/* Default guest test memory offset, 1G */ #define DEFAULT_GUEST_TEST_MEM 0x40000000 +#endif
/* How many pages to dirty for each guest loop */ #define TEST_PAGES_PER_LOOP 1024 @@ -38,6 +51,27 @@ /* Interval for each host loop (ms) */ #define TEST_HOST_LOOP_INTERVAL 10UL +/* Dirty bitmaps are always little endian, so we need to swap on big endian */ +#if defined(__s390x__) +# define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) +# define test_bit_le(nr, addr) \
- test_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define set_bit_le(nr, addr) \
- set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define clear_bit_le(nr, addr) \
- clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define test_and_set_bit_le(nr, addr) \
- test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define test_and_clear_bit_le(nr, addr) \
- test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+#else +# define test_bit_le test_bit +# define set_bit_le set_bit +# define clear_bit_le clear_bit +# define test_and_set_bit_le test_and_set_bit +# define test_and_clear_bit_le test_and_clear_bit +#endif
/*
- Guest/Host shared variables. Ensure addr_gva2hva() and/or
- sync_global_to/from_guest() are used when accessing from
@@ -69,11 +103,25 @@ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; */ static void guest_code(void) {
- uint64_t addr; int i;
+#ifdef __s390x__
- /*
* On s390x, all pages of a 1M segment are initially marked as dirty
* when a page of the segment is written to for the very first time.
* To compensate this specialty in this test, we need to touch all
* pages during the first iteration.
*/
- for (i = 0; i < guest_num_pages; i++) {
addr = guest_test_virt_mem + i * guest_page_size;
*(uint64_t *)addr = READ_ONCE(iteration);
- }
+#endif
- while (true) { for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
uint64_t addr = guest_test_virt_mem;
addr = guest_test_virt_mem; addr += (READ_ONCE(random_array[i]) % guest_num_pages) * guest_page_size; addr &= ~(host_page_size - 1);
@@ -158,15 +206,15 @@ static void vm_dirty_log_verify(unsigned long *bmap) value_ptr = host_test_mem + page * host_page_size; /* If this is a special page that we were tracking... */
if (test_and_clear_bit(page, host_bmap_track)) {
if (test_and_clear_bit_le(page, host_bmap_track)) { host_track_next_count++;
TEST_ASSERT(test_bit(page, bmap),
}TEST_ASSERT(test_bit_le(page, bmap), "Page %"PRIu64" should have its dirty bit " "set in this iteration but it is missing", page);
if (test_bit(page, bmap)) {
if (test_bit_le(page, bmap)) { host_dirty_count++; /* * If the bit is set, the value written onto
@@ -209,7 +257,7 @@ static void vm_dirty_log_verify(unsigned long *bmap) * should report its dirtyness in the * next run */
set_bit(page, host_bmap_track);
} }set_bit_le(page, host_bmap_track); }
@@ -293,6 +341,10 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, * case where the size is not aligned to 64 pages. */ guest_num_pages = (1ul << (30 - guest_page_shift)) + 16; +#ifdef __s390x__
- /* Round up to multiple of 1M (segment size) */
- guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL;
+#endif host_page_size = getpagesize(); host_num_pages = (guest_num_pages * guest_page_size) / host_page_size + !!((guest_num_pages * guest_page_size) % host_page_size); @@ -304,6 +356,11 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, guest_test_phys_mem = phys_offset; } +#ifdef __s390x__
- /* Align to 1M (segment size) */
- guest_test_phys_mem &= ~((1 << 20) - 1);
+#endif
- DEBUG("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
bmap = bitmap_alloc(host_num_pages); @@ -454,6 +511,9 @@ int main(int argc, char *argv[]) vm_guest_mode_params_init(VM_MODE_P48V48_64K, true, true); } #endif +#ifdef __s390x__
- vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true);
+#endif while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) { switch (opt) {
On 30/07/2019 16.57, Christian Borntraeger wrote:
On 30.07.19 12:01, Thomas Huth wrote:
To run the dirty_log_test on s390x, we have to make sure that we access the dirty log bitmap with little endian byte ordering and we have to properly align the memslot of the guest. Also all dirty bits of a segment are set once on s390x when one of the pages of a segment are written to for the first time, so we have to make sure that we touch all pages during the first iteration to keep the test in sync here.
While this fixes the test (and the migration does work fine), it still means that s390x overindicates the dirty bit for sparsely populated 1M segments. It is just a performance issue, but maybe we should try to get this fixed.
I hope you don't expect me to fix this - the gmap code is really not my turf...
Not sure what to do here to remember us about this, adding this as expected fail?
There is no such thing like an expected failure in KVM selftests - that's only available in kvm-unit-tests.
So the only option that I currently see is to add a printf("TODO: ...") on s390x here... would that work for you?
Thomas
On 30.07.19 19:11, Thomas Huth wrote:
On 30/07/2019 16.57, Christian Borntraeger wrote:
On 30.07.19 12:01, Thomas Huth wrote:
To run the dirty_log_test on s390x, we have to make sure that we access the dirty log bitmap with little endian byte ordering and we have to properly align the memslot of the guest. Also all dirty bits of a segment are set once on s390x when one of the pages of a segment are written to for the first time, so we have to make sure that we touch all pages during the first iteration to keep the test in sync here.
While this fixes the test (and the migration does work fine), it still means that s390x overindicates the dirty bit for sparsely populated 1M segments. It is just a performance issue, but maybe we should try to get this fixed.
I hope you don't expect me to fix this - the gmap code is really not my turf...
No, this is clearly on our turf.
Not sure what to do here to remember us about this, adding this as expected fail?
There is no such thing like an expected failure in KVM selftests - that's only available in kvm-unit-tests.
So the only option that I currently see is to add a printf("TODO: ...") on s390x here... would that work for you?
Maybe just keep this as is - we should just not forget about it.
On 30.07.19 20:04, Christian Borntraeger wrote:
On 30.07.19 19:11, Thomas Huth wrote:
On 30/07/2019 16.57, Christian Borntraeger wrote:
On 30.07.19 12:01, Thomas Huth wrote:
To run the dirty_log_test on s390x, we have to make sure that we access the dirty log bitmap with little endian byte ordering and we have to properly align the memslot of the guest. Also all dirty bits of a segment are set once on s390x when one of the pages of a segment are written to for the first time, so we have to make sure that we touch all pages during the first iteration to keep the test in sync here.
While this fixes the test (and the migration does work fine), it still means that s390x overindicates the dirty bit for sparsely populated 1M segments. It is just a performance issue, but maybe we should try to get this fixed.
I hope you don't expect me to fix this - the gmap code is really not my turf...
No, this is clearly on our turf.
FWIW, we share the pagetables with the userspace process. We mark a page as dirty (PGSTE_UC_BIT) when - We modify the storage key - We map a PTE as RW (pgste_set_pte())
I assume all PTEs of the segment are mapped RW (for example, if user space wrote to such a PTE), that is why we have the PGSTE_UC_BIT bit set.
As PGSTE_UC_BIT also tracks what userspace did, not only KVM via the GMAP, this might indeed be correct.
On 30/07/19 16:57, Christian Borntraeger wrote:
While this fixes the test (and the migration does work fine), it still means that s390x overindicates the dirty bit for sparsely populated 1M segments. It is just a performance issue, but maybe we should try to get this fixed. Not sure what to do here to remember us about this, adding this as expected fail?
if it's only on the first access after enabling dirty logging, that shouldn't be that bad?
Paolo
On 30.07.19 21:06, Paolo Bonzini wrote:
On 30/07/19 16:57, Christian Borntraeger wrote:
While this fixes the test (and the migration does work fine), it still means that s390x overindicates the dirty bit for sparsely populated 1M segments. It is just a performance issue, but maybe we should try to get this fixed. Not sure what to do here to remember us about this, adding this as expected fail?
if it's only on the first access after enabling dirty logging, that shouldn't be that bad?
No its not bad, but certainly something to improve if time allows.
On 30/07/19 12:01, Thomas Huth wrote:
Implement the ucall() interface on s390x to be able to use the dirty_log_test KVM selftest on s390x, too.
Thomas Huth (2): KVM: selftests: Implement ucall() for s390x KVM: selftests: Enable dirty_log_test on s390x
tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/dirty_log_test.c | 70 +++++++++++++++++-- .../testing/selftests/kvm/include/kvm_util.h | 2 +- tools/testing/selftests/kvm/lib/ucall.c | 34 +++++++-- .../selftests/kvm/s390x/sync_regs_test.c | 6 +- 5 files changed, 98 insertions(+), 15 deletions(-)
Acked-by: Paolo Bonzini pbonzini@redhat.com
(apart from the small review comment on patch 2).
Paolo
linux-kselftest-mirror@lists.linaro.org