[PATCH 4.14 046/246] powerpc/lib: Adjust .balign inside string functions for PPC32

1 Aug 2018

4.14-stable review patch.  If anyone has any objections, please let me know.
------------------
From: Christophe Leroy christophe.leroy@c-s.fr
[ Upstream commit 1128bb7813a896bd608fb622eee3c26aaf33b473 ]
commit 87a156fb18fe1 ("Align hot loops of some string functions")
degraded the performance of string functions by adding useless
nops
A simple benchmark on an 8xx calling 100000x a memchr() that
matches the first byte runs in 41668 TB ticks before this patch
and in 35986 TB ticks after this patch. So this gives an
improvement of approx 10%
Another benchmark doing the same with a memchr() matching the 128th
byte runs in 1011365 TB ticks before this patch and 1005682 TB ticks
after this patch, so regardless on the number of loops, removing
those useless nops improves the test by 5683 TB ticks.
Fixes: 87a156fb18fe1 ("Align hot loops of some string functions")
Signed-off-by: Christophe Leroy christophe.leroy@c-s.fr
Signed-off-by: Michael Ellerman mpe@ellerman.id.au
Signed-off-by: Sasha Levin alexander.levin@microsoft.com
Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
---
 arch/powerpc/include/asm/cache.h |    3 +++
 arch/powerpc/lib/string.S        |    7 ++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -9,11 +9,14 @@
 #if defined(CONFIG_PPC_8xx) || defined(CONFIG_403GCX)
 #define L1_CACHE_SHIFT		4
 #define MAX_COPY_PREFETCH	1
+#define IFETCH_ALIGN_SHIFT	2
 #elif defined(CONFIG_PPC_E500MC)
 #define L1_CACHE_SHIFT		6
 #define MAX_COPY_PREFETCH	4
+#define IFETCH_ALIGN_SHIFT	3
 #elif defined(CONFIG_PPC32)
 #define MAX_COPY_PREFETCH	4
+#define IFETCH_ALIGN_SHIFT	3	/* 603 fetches 2 insn at a time */
 #if defined(CONFIG_PPC_47x)
 #define L1_CACHE_SHIFT		7
 #else
--- a/arch/powerpc/lib/string.S
+++ b/arch/powerpc/lib/string.S
@@ -12,6 +12,7 @@
 #include <asm/errno.h>
 #include <asm/ppc_asm.h>
 #include <asm/export.h>
+#include <asm/cache.h>
.text
    
@@ -23,7 +24,7 @@ _GLOBAL(strncpy)
    mtctr	r5
    addi	r6,r3,-1
    addi	r4,r4,-1
-	.balign 16
+	.balign IFETCH_ALIGN_BYTES
 1:	lbzu	r0,1(r4)
    cmpwi	0,r0,0
    stbu	r0,1(r6)
@@ -43,7 +44,7 @@ _GLOBAL(strncmp)
    mtctr	r5
    addi	r5,r3,-1
    addi	r4,r4,-1
-	.balign 16
+	.balign IFETCH_ALIGN_BYTES
 1:	lbzu	r3,1(r5)
    cmpwi	1,r3,0
    lbzu	r0,1(r4)
@@ -77,7 +78,7 @@ _GLOBAL(memchr)
    beq-	2f
    mtctr	r5
    addi	r3,r3,-1
-	.balign 16
+	.balign IFETCH_ALIGN_BYTES
 1:	lbzu	r0,1(r3)
    cmpw	0,r0,r4
    bdnzf	2,1b

    

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

[PATCH 4.14 046/246] powerpc/lib: Adjust .balign inside string functions for PPC32