On 9/3/24 12:47 AM, Christophe Leroy wrote:
Extend getrandom() vDSO implementation to VDSO64
Tested on QEMU on both ppc64_defconfig and ppc64le_defconfig.
The results are not precise as it is QEMU on an x86 laptop, but no need to be precise to see the benefit.
~ # ./vdso_test_getrandom bench-single vdso: 25000000 times in 4.977777162 seconds libc: 25000000 times in 75.516749981 seconds syscall: 25000000 times in 86.842242014 seconds
~ # ./vdso_test_getrandom bench-single vdso: 25000000 times in 6.473814156 seconds libc: 25000000 times in 73.875109463 seconds syscall: 25000000 times in 71.805066229 seconds
Tried the patchset on top of
https://kernel.googlesource.com/pub/scm/linux/kernel/git/crng/random.git (commit 963233ff013377bc2aa0d641b9efbb7fd4c2b72c (origin/master, origin/HEAD, master))
Results from a Power9 (PowerNV) # ./vdso_test_getrandom bench-single vdso: 25000000 times in 0.787943615 seconds libc: 25000000 times in 14.101887252 seconds syscall: 25000000 times in 14.047475082 seconds
Impressive, thanks for enabling it.
Tested-by: Madhavan Srinivasan maddy@linux.ibm.com
Signed-off-by: Christophe Leroy christophe.leroy@csgroup.eu
v5:
- VDSO32 for both PPC32 and PPC64 is in previous patch. This patch have the logic for VDSO64.
v4:
- Use __BIG_ENDIAN__ which is defined by GCC instead of CONFIG_CPU_BIG_ENDIAN which is unknown by selftests
- Implement a cleaner/smaller output copy for little endian instead of keeping compat macro.
v3: New (split out of previous patch)
arch/powerpc/Kconfig | 2 +- arch/powerpc/kernel/vdso/Makefile | 8 ++- arch/powerpc/kernel/vdso/getrandom.S | 8 +++ arch/powerpc/kernel/vdso/vdso64.lds.S | 1 + arch/powerpc/kernel/vdso/vgetrandom-chacha.S | 53 ++++++++++++++++++++ 5 files changed, 69 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e500a59ddecc..b45452ac4a73 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -311,7 +311,7 @@ config PPC select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select TRACE_IRQFLAGS_SUPPORT
- select VDSO_GETRANDOM if VDSO32
- select VDSO_GETRANDOM # # Please keep this list sorted alphabetically. #
diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 7a4a935406d8..56fb1633529a 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -9,6 +9,7 @@ obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o not obj-vdso64 = sigtramp64-64.o gettimeofday-64.o datapage-64.o cacheflush-64.o note-64.o getcpu-64.o obj-vdso32 += getrandom-32.o vgetrandom-chacha-32.o +obj-vdso64 += getrandom-64.o vgetrandom-chacha-64.o ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday-32.o += -include $(c-gettimeofday-y) @@ -21,6 +22,7 @@ endif ifneq ($(c-getrandom-y),) CFLAGS_vgetrandom-32.o += -include $(c-getrandom-y)
- CFLAGS_vgetrandom-64.o += -include $(c-getrandom-y) $(call cc-option, -ffixed-r30) endif
# Build rules @@ -34,7 +36,7 @@ endif targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o vgetrandom-32.o targets += crtsavres-32.o obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) -targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o +targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o vgetrandom-64.o obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) ccflags-y := -fno-common -fno-builtin -DBUILD_VDSO @@ -71,7 +73,7 @@ CPPFLAGS_vdso64.lds += -P -C # link rule for the .so file, .lds has to be first $(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o $(obj)/vgetrandom-32.o $(obj)/crtsavres-32.o FORCE $(call if_changed,vdso32ld_and_check) -$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o FORCE +$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o $(obj)/vgetrandom-64.o FORCE $(call if_changed,vdso64ld_and_check) # assembly rules for the .S files @@ -87,6 +89,8 @@ $(obj-vdso64): %-64.o: %.S FORCE $(call if_changed_dep,vdso64as) $(obj)/vgettimeofday-64.o: %-64.o: %.c FORCE $(call if_changed_dep,cc_o_c) +$(obj)/vgetrandom-64.o: %-64.o: %.c FORCE
- $(call if_changed_dep,cc_o_c)
# Generate VDSO offsets using helper script gen-vdso32sym := $(src)/gen_vdso32_offsets.sh diff --git a/arch/powerpc/kernel/vdso/getrandom.S b/arch/powerpc/kernel/vdso/getrandom.S index 21773ef3fc1d..a957cd2b2b03 100644 --- a/arch/powerpc/kernel/vdso/getrandom.S +++ b/arch/powerpc/kernel/vdso/getrandom.S @@ -27,10 +27,18 @@ .cfi_adjust_cfa_offset PPC_MIN_STKFRM PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) .cfi_rel_offset lr, PPC_MIN_STKFRM + PPC_LR_STKOFF +#ifdef __powerpc64__
- PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1)
- .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT
+#endif get_datapage r8 addi r8, r8, VDSO_RNG_DATA_OFFSET bl CFUNC(DOTSYM(\funct)) PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__
- PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1)
- .cfi_restore r2
+#endif cmpwi r3, 0 mtlr r0 addi r1, r1, 2 * PPC_MIN_STKFRM diff --git a/arch/powerpc/kernel/vdso/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S index 400819258c06..9481e4b892ed 100644 --- a/arch/powerpc/kernel/vdso/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso/vdso64.lds.S @@ -123,6 +123,7 @@ VERSION __kernel_sigtramp_rt64; __kernel_getcpu; __kernel_time;
__kernel_getrandom;
local: *; }; diff --git a/arch/powerpc/kernel/vdso/vgetrandom-chacha.S b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S index ac85788205cb..7f9061a9e8b4 100644 --- a/arch/powerpc/kernel/vdso/vgetrandom-chacha.S +++ b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S @@ -124,6 +124,26 @@ */ SYM_FUNC_START(__arch_chacha20_blocks_nostack) #ifdef __powerpc64__
- std counter, -216(r1)
- std r14, -144(r1)
- std r15, -136(r1)
- std r16, -128(r1)
- std r17, -120(r1)
- std r18, -112(r1)
- std r19, -104(r1)
- std r20, -96(r1)
- std r21, -88(r1)
- std r22, -80(r1)
- std r23, -72(r1)
- std r24, -64(r1)
- std r25, -56(r1)
- std r26, -48(r1)
- std r27, -40(r1)
- std r28, -32(r1)
- std r29, -24(r1)
- std r30, -16(r1)
- std r31, -8(r1) #else stwu r1, -96(r1) stw counter, 20(r1)
@@ -149,9 +169,13 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) stw r30, 88(r1) stw r31, 92(r1) #endif +#endif /* __powerpc64__ */ lwz counter0, 0(counter) lwz counter1, 4(counter) +#ifdef __powerpc64__
- rldimi counter0, counter1, 32, 0
+#endif mr idx_r0, nblocks subi dst_bytes, dst_bytes, 4 @@ -267,12 +291,21 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) subic. idx_r0, idx_r0, 1 /* subi. can't use r0 as source */ +#ifdef __powerpc64__
- addi counter0, counter0, 1
- srdi counter1, counter0, 32
+#else addic counter0, counter0, 1 addze counter1, counter1 +#endif bne .Lblock +#ifdef __powerpc64__
- ld counter, -216(r1)
+#else lwz counter, 20(r1) +#endif stw counter0, 0(counter) stw counter1, 4(counter) @@ -284,6 +317,26 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) li r11, 0 li r12, 0 +#ifdef __powerpc64__
- ld r14, -144(r1)
- ld r15, -136(r1)
- ld r16, -128(r1)
- ld r17, -120(r1)
- ld r18, -112(r1)
- ld r19, -104(r1)
- ld r20, -96(r1)
- ld r21, -88(r1)
- ld r22, -80(r1)
- ld r23, -72(r1)
- ld r24, -64(r1)
- ld r25, -56(r1)
- ld r26, -48(r1)
- ld r27, -40(r1)
- ld r28, -32(r1)
- ld r29, -24(r1)
- ld r30, -16(r1)
- ld r31, -8(r1)
+#else #ifdef __BIG_ENDIAN__ lmw r14, 24(r1) #else