User space can use the MEM_OP ioctl to make storage key checked reads and writes to the guest, however, it has no way of performing atomic, key checked, accesses to the guest. Extend the MEM_OP ioctl in order to allow for this, by adding a cmpxchg mode. For now, support this mode for absolute accesses only.
This mode can be use, for example, to set the device-state-change indicator and the adapter-local-summary indicator atomically.
Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com ---
The return value of MEM_OP is: 0 on success, < 0 on generic error (e.g. -EFAULT or -ENOMEM),
0 if an exception occurred while walking the page tables
A cmpxchg failing because the old value doesn't match is neither an error nor an exception, so the question is how best to signal that condition. This is not strictly necessary since user space can compare the value of old after the MEM_OP with the value it set. If they're different the cmpxchg failed. It might be a better user interface if there is an easier way to see if the cmpxchg failed. This patch sets the cmpxchg flag bit to 0 on a successful cmpxchg. This way you can compare against a constant instead of the old old value. This has the disadvantage of being a bit weird, other suggestions welcome.
include/uapi/linux/kvm.h | 5 ++++ arch/s390/kvm/gaccess.h | 4 +++ arch/s390/kvm/gaccess.c | 56 ++++++++++++++++++++++++++++++++++++++++ arch/s390/kvm/kvm-s390.c | 50 ++++++++++++++++++++++++++++++----- 4 files changed, 109 insertions(+), 6 deletions(-)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index eed0315a77a6..b856705f3f6b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -580,7 +580,9 @@ struct kvm_translation { struct kvm_s390_mem_op { /* in */ __u64 gaddr; /* the guest address */ + /* in & out */ __u64 flags; /* flags */ + /* in */ __u32 size; /* amount of bytes */ __u32 op; /* type of operation */ __u64 buf; /* buffer in userspace */ @@ -588,6 +590,8 @@ struct kvm_s390_mem_op { struct { __u8 ar; /* the access register number */ __u8 key; /* access key, ignored if flag unset */ + /* in & out */ + __u64 old[2]; /* ignored if flag unset */ }; __u32 sida_offset; /* offset into the sida */ __u8 reserved[32]; /* ignored */ @@ -604,6 +608,7 @@ struct kvm_s390_mem_op { #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) #define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) +#define KVM_S390_MEMOP_F_CMPXCHG (1ULL << 3)
/* for KVM_INTERRUPT */ struct kvm_interrupt { diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 9408d6cc8e2c..a1cb66ae0995 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -206,6 +206,10 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, unsigned long len, enum gacc_mode mode);
+int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, + unsigned __int128 *old, + unsigned __int128 new, u8 access_key); + /** * write_guest_with_key - copy data from kernel space to guest space * @vcpu: virtual cpu diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 0243b6e38d36..c0e490ecc372 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1161,6 +1161,62 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, return rc; }
+/** + * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address. + * @kvm: Virtual machine instance. + * @gpa: Absolute guest address of the location to be changed. + * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a + * non power of two will result in failure. + * @old_p: Pointer to old value. If the location at @gpa contains this value, the + * exchange will succeed. After calling cmpxchg_guest_abs_with_key() *@old + * contains the value at @gpa before the attempt to exchange the value. + * @new: The value to place at @gpa. + * @access_key: The access key to use for the guest access. + * + * Atomically exchange the value at @gpa by @new, if it contains *@old. + * Honors storage keys. + * + * Return: * 0: successful exchange + * * 1: exchange unsuccessful + * * a program interruption code indicating the reason cmpxchg could + * not be attempted + * * -EINVAL: address misaligned or len not power of two + */ +int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, + unsigned __int128 *old_p, unsigned __int128 new, + u8 access_key) +{ + gfn_t gfn = gpa >> PAGE_SHIFT; + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + bool writable; + hva_t hva; + int ret; + + if (!IS_ALIGNED(gpa, len)) + return -EINVAL; + + hva = gfn_to_hva_memslot_prot(slot, gfn, &writable); + if (kvm_is_error_hva(hva)) + return PGM_ADDRESSING; + /* + * Check if it's a ro memslot, even tho that can't occur (they're unsupported). + * Don't try to actually handle that case. + */ + if (!writable) + return -EOPNOTSUPP; + + hva += offset_in_page(gpa); + ret = cmpxchg_user_key_size(len, (void __user *)hva, old_p, new, access_key); + mark_page_dirty_in_slot(kvm, slot, gfn); + /* + * Assume that the fault is caused by key protection, the alternative + * is that the user page is write protected. + */ + if (ret == -EFAULT) + ret = PGM_PROTECTION; + return ret; +} + /** * guest_translate_address_with_key - translate guest logical into guest absolute address * @vcpu: virtual cpu diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b7ef0b71014d..d594d1318d2a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -576,7 +576,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_VCPU_RESETS: case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_S390_DIAG318: - case KVM_CAP_S390_MEM_OP_EXTENSION: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: @@ -590,6 +589,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_MEM_OP: r = MEM_OP_MAX_SIZE; break; + case KVM_CAP_S390_MEM_OP_EXTENSION: + r = 0x3; + break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: case KVM_CAP_MAX_VCPU_ID: @@ -2711,15 +2713,22 @@ static bool access_key_invalid(u8 access_key) return access_key > 0xf; }
-static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) +static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop, bool *modified) { void __user *uaddr = (void __user *)mop->buf; + unsigned __int128 old; + union { + unsigned __int128 quad; + char raw[sizeof(unsigned __int128)]; + } new = { .quad = 0 }; u64 supported_flags; void *tmpbuf = NULL; int r, srcu_idx;
+ *modified = false; supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION - | KVM_S390_MEMOP_F_CHECK_ONLY; + | KVM_S390_MEMOP_F_CHECK_ONLY + | KVM_S390_MEMOP_F_CMPXCHG; if (mop->flags & ~supported_flags || !mop->size) return -EINVAL; if (mop->size > MEM_OP_MAX_SIZE) @@ -2741,6 +2750,13 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) } else { mop->key = 0; } + if (mop->flags & KVM_S390_MEMOP_F_CMPXCHG) { + if (mop->size > sizeof(new)) + return -EINVAL; + if (copy_from_user(&new.raw[sizeof(new) - mop->size], uaddr, mop->size)) + return -EFAULT; + memcpy(&old, mop->old, sizeof(old)); + } if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { tmpbuf = vmalloc(mop->size); if (!tmpbuf) @@ -2771,6 +2787,16 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) case KVM_S390_MEMOP_ABSOLUTE_WRITE: { if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key); + } else if (mop->flags & KVM_S390_MEMOP_F_CMPXCHG) { + r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, + &old, new.quad, mop->key); + if (!r) { + mop->flags &= ~KVM_S390_MEMOP_F_CMPXCHG; + } else if (r == 1) { + memcpy(mop->old, &old, sizeof(old)); + r = 0; + } + *modified = true; } else { if (copy_from_user(tmpbuf, uaddr, mop->size)) { r = -EFAULT; @@ -2918,11 +2944,23 @@ long kvm_arch_vm_ioctl(struct file *filp, } case KVM_S390_MEM_OP: { struct kvm_s390_mem_op mem_op; + bool modified;
- if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) - r = kvm_s390_vm_mem_op(kvm, &mem_op); - else + r = copy_from_user(&mem_op, argp, sizeof(mem_op)); + if (r) { r = -EFAULT; + break; + } + r = kvm_s390_vm_mem_op(kvm, &mem_op, &modified); + if (r) + break; + if (modified) { + r = copy_to_user(argp, &mem_op, sizeof(mem_op)); + if (r) { + r = -EFAULT; + break; + } + } break; } case KVM_S390_ZPCI_OP: {