This is an automated email from the git hooks/post-receive script.
unknown user pushed a commit to branch hjl/iamcu/improve in repository gcc.
commit 7b5076ffd5a5d9fbd7730bd3471655f4b31d8105 Author: H.J. Lu hjl.tools@gmail.com Date: Tue Jul 21 14:32:09 2015 -0700
Add __builtin_stack_top
When __builtin_frame_address is used to retrieve the address of the function stack frame, the frame pointer is always kept, which wastes one register and 2 instructions. For x86-32, one less register means significant negative impact on performance. This patch adds a new builtin function, __builtin_stack_top. It returns the stack address when the function is called.
This patch only enables __builtin_stack_top for x86 backend. Using __builtin_stack_top with other backends will lead to
sorry, unimplemented: ‘__builtin_stack_top’ not supported on this target
TARGET_STACK_TOP_RTX must be defined to enable __builtin_stack_top. default_stack_top_rtx may be extended to support more backends, including those with INITIAL_FRAME_ADDRESS_RTX.
gcc/
PR target/66960 * builtin-types.def (BT_FN_PTR_VOID): New function type. * builtins.c (expand_builtin): Handle BUILT_IN_STACK_TOP. (is_simple_builtin): Likewise. * ipa-pure-const.c (special_builtin_state): Likewise. * builtins.def: Add BUILT_IN_STACK_TOP. * function.h (function): Add stack_top_taken. * target.def (stack_top_rtx): New target hook. * targhooks.c (default_stack_top_rtx): New. * targhooks.h (default_stack_top_rtx): Likewise. * config/i386/i386.c (ix86_expand_prologue): Sorry if DRAP is used and the stack address has been taken. (TARGET_STACK_TOP_RTX): New. * doc/extend.texi: Document __builtin_stack_top. * doc/tm.texi.in (TARGET_STACK_TOP_RTX): New. * doc/tm.texi: Regenerated.
gcc/testsuite/
PR target/66960 * gcc.target/i386/pr66960-1.c: New test. * gcc.target/i386/pr66960-2.c: Likewise. * gcc.target/i386/pr66960-3.c: Likewise. * gcc.target/i386/pr66960-4.c: Likewise. * gcc.target/i386/pr66960-5.c: Likewise. --- gcc/builtin-types.def | 1 + gcc/builtins.c | 11 +++++++++++ gcc/builtins.def | 1 + gcc/config/i386/i386.c | 8 ++++++++ gcc/doc/extend.texi | 7 +++++++ gcc/doc/tm.texi | 5 +++++ gcc/doc/tm.texi.in | 2 ++ gcc/function.h | 3 +++ gcc/ipa-pure-const.c | 1 + gcc/target.def | 7 +++++++ gcc/targhooks.c | 9 +++++++++ gcc/targhooks.h | 3 +++ gcc/testsuite/gcc.target/i386/pr66960-1.c | 33 +++++++++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr66960-2.c | 33 +++++++++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr66960-3.c | 17 ++++++++++++++++ gcc/testsuite/gcc.target/i386/pr66960-4.c | 21 ++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr66960-5.c | 21 ++++++++++++++++++++ 17 files changed, 183 insertions(+)
diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def index 0e34531..2b6b5ab 100644 --- a/gcc/builtin-types.def +++ b/gcc/builtin-types.def @@ -177,6 +177,7 @@ DEF_FUNCTION_TYPE_1 (BT_FN_COMPLEX_LONGDOUBLE_LONGDOUBLE, BT_COMPLEX_LONGDOUBLE, BT_LONGDOUBLE) DEF_FUNCTION_TYPE_1 (BT_FN_PTR_UINT, BT_PTR, BT_UINT) DEF_FUNCTION_TYPE_1 (BT_FN_PTR_SIZE, BT_PTR, BT_SIZE) +DEF_FUNCTION_TYPE_1 (BT_FN_PTR_VOID, BT_PTR, BT_VOID) DEF_FUNCTION_TYPE_1 (BT_FN_INT_INT, BT_INT, BT_INT) DEF_FUNCTION_TYPE_1 (BT_FN_INT_UINT, BT_INT, BT_UINT) DEF_FUNCTION_TYPE_1 (BT_FN_INT_LONG, BT_INT, BT_LONG) diff --git a/gcc/builtins.c b/gcc/builtins.c index 1750e25..94514b4 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -6218,6 +6218,16 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine [...] case BUILT_IN_CONSTANT_P: return const0_rtx;
+ case BUILT_IN_STACK_TOP: + if (targetm.calls.stack_top_rtx) + { + cfun->stack_top_taken = true; + return targetm.calls.stack_top_rtx (); + } + else + sorry ("%<__builtin_stack_top%> not supported on this target"); + break; + case BUILT_IN_FRAME_ADDRESS: case BUILT_IN_RETURN_ADDRESS: return expand_builtin_frame_address (fndecl, exp); @@ -12407,6 +12417,7 @@ is_simple_builtin (tree decl) case BUILT_IN_RETURN: case BUILT_IN_AGGREGATE_INCOMING_ADDRESS: case BUILT_IN_FRAME_ADDRESS: + case BUILT_IN_STACK_TOP: case BUILT_IN_VA_END: case BUILT_IN_STACK_SAVE: case BUILT_IN_STACK_RESTORE: diff --git a/gcc/builtins.def b/gcc/builtins.def index 80e4a9c..62f0523 100644 --- a/gcc/builtins.def +++ b/gcc/builtins.def @@ -778,6 +778,7 @@ DEF_EXT_LIB_BUILTIN (BUILT_IN_FFSL, "ffsl", BT_FN_INT_LONG, [...] DEF_EXT_LIB_BUILTIN (BUILT_IN_FFSLL, "ffsll", BT_FN_INT_LONGLONG, ATTR_CONST_NO [...] DEF_EXT_LIB_BUILTIN (BUILT_IN_FORK, "fork", BT_FN_PID, ATTR_NOTHROW_LIST) DEF_GCC_BUILTIN (BUILT_IN_FRAME_ADDRESS, "frame_address", BT_FN_PTR_UINT, A [...] +DEF_GCC_BUILTIN (BUILT_IN_STACK_TOP, "stack_top", BT_FN_PTR_VOID, ATTR_NULL) /* [trans-mem]: Adjust BUILT_IN_TM_FREE if BUILT_IN_FREE is changed. */ DEF_LIB_BUILTIN (BUILT_IN_FREE, "free", BT_FN_VOID_PTR, ATTR_NOTHROW_LEAF_LIST) DEF_GCC_BUILTIN (BUILT_IN_FROB_RETURN_ADDR, "frob_return_addr", BT_FN_PTR_P [...] diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 4a2c57b..964d693 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -11575,6 +11575,12 @@ ix86_expand_prologue (void) { int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
+ /* Can't use DRAP if the stack address has been taken. */ + if (cfun->stack_top_taken) + sorry ("%<__builtin_stack_top%> not supported with stack" + " realignment. This may be worked around by adding" + " -maccumulate-outgoing-arg."); + /* Only need to push parameter pointer reg if it is caller saved. */ if (!call_used_regs[REGNO (crtl->drap_reg)]) { @@ -52576,6 +52582,8 @@ ix86_operands_ok_for_move_multiple (rtx *operands, bool load, #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary #undef TARGET_GET_DRAP_RTX #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx +#undef TARGET_STACK_TOP_RTX +#define TARGET_STACK_TOP_RTX default_stack_top_rtx #undef TARGET_STRICT_ARGUMENT_NAMING #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true #undef TARGET_STATIC_CHAIN diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index b18d8fb..e08b9f9 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -8695,6 +8695,13 @@ This function should only be used with a nonzero argument fo [...] purposes. @end deftypefn
+@deftypefn {Built-in Function} {void *} __builtin_stack_top (void) +This function is similar to calling @code{__builtin_frame_address} +with a value of @code{0}, but it returns the stack address when the +function is called. Unlike @code{__builtin_frame_address}, the frame +pointer register is kept only when necessary. +@end deftypefn + @node Vector Extensions @section Using Vector Instructions through Built-in Functions
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index f95646c..e2cd480 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -11483,6 +11483,11 @@ argument list due to stack realignment. Return @code{NULL [...] is needed. @end deftypefn
+@deftypefn {Target Hook} rtx TARGET_STACK_TOP_RTX (void) +This hook should return an rtx for the stack address when the function +is called. +@end deftypefn + @deftypefn {Target Hook} bool TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS (void) When optimization is disabled, this hook indicates whether or not arguments should be allocated to stack slots. Normally, GCC allocates diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 2383fb9..9167069 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -8181,6 +8181,8 @@ and the associated definitions of those functions.
@hook TARGET_GET_DRAP_RTX
+@hook TARGET_STACK_TOP_RTX + @hook TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
@hook TARGET_CONST_ANCHOR diff --git a/gcc/function.h b/gcc/function.h index e92c17c..dd1c38a 100644 --- a/gcc/function.h +++ b/gcc/function.h @@ -378,6 +378,9 @@ struct GTY(()) function {
/* Set when the tail call has been identified. */ unsigned int tail_call_marked : 1; + + /* Set when the address of the stack top has been taken. */ + unsigned int stack_top_taken : 1; };
/* Add the decl D to the local_decls list of FUN. */ diff --git a/gcc/ipa-pure-const.c b/gcc/ipa-pure-const.c index 8fd8c36..2405082 100644 --- a/gcc/ipa-pure-const.c +++ b/gcc/ipa-pure-const.c @@ -480,6 +480,7 @@ special_builtin_state (enum pure_const_state_e *state, bool *looping, case BUILT_IN_CXA_END_CLEANUP: case BUILT_IN_EH_COPY_VALUES: case BUILT_IN_FRAME_ADDRESS: + case BUILT_IN_STACK_TOP: case BUILT_IN_APPLY: case BUILT_IN_APPLY_ARGS: *looping = false; diff --git a/gcc/target.def b/gcc/target.def index 4edc209..7a30f39 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -4525,6 +4525,13 @@ argument list due to stack realignment. Return @code{NULL} [...] is needed.", rtx, (void), NULL)
+/* Get the stack address when the function is called. */ +DEFHOOK +(stack_top_rtx, + "This hook should return an rtx for the stack address when the function\n\ +is called.", + rtx, (void), NULL) + /* Return true if all function parameters should be spilled to the stack. */ DEFHOOK diff --git a/gcc/targhooks.c b/gcc/targhooks.c index 3eca47e..f188272 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -1926,4 +1926,13 @@ can_use_doloop_if_innermost (const widest_int &, const wides [...] return loop_depth == 1; }
+/* Get the stack address when the function is called. After the + prologue, stack top is at -WORD(AP) in the current frame. */ + +rtx +default_stack_top_rtx (void) +{ + return plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD); +} + #include "gt-targhooks.h" diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 5ae991d..094a589 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -240,4 +240,7 @@ extern void default_setup_incoming_vararg_bounds (cumulative_ar [...] tree type ATTRIBUTE_UNUSED, int *pretend_arg_size ATTRIBUTE_UNUSED, int second_time ATTRIBUTE_UNUSED); + +extern rtx default_stack_top_rtx (void); + #endif /* GCC_TARGHOOKS_H */ diff --git a/gcc/testsuite/gcc.target/i386/pr66960-1.c b/gcc/testsuite/gcc.target/i [...] new file mode 100644 index 0000000..aaab3cf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr66960-1.c @@ -0,0 +1,33 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fomit-frame-pointer" { target { lp64 } } } */ +/* { dg-options "-O2 -fomit-frame-pointer -maddress-mode=short" { target { x32 } } } */ +/* { dg-options "-O2 -fomit-frame-pointer -miamcu" { target { ia32 } } } */ + +extern char **environ; +extern void exit (int status); +extern int main (long argc, char **argv, char **envp); + +void +_start (void) +{ + void *argc_p = __builtin_stack_top (); + char **argv = (char **) (argc_p + sizeof (void *)); + long argc = *(long *) argc_p; + int status; + + environ = argv + argc + 1; + + status = main (argc, argv, environ); + + exit (status); +} + +/* { dg-final { scan-assembler "movq[ \t]8\(%rsp\), %rdi" { target lp64 } } } */ +/* { dg-final { scan-assembler "leaq[ \t]16\(%rsp\), %rsi" { target lp64 } } } */ +/* { dg-final { scan-assembler "leaq[ \t]24\(%rsp,%rdi,8\), %rdx" { target lp6 [...] +/* { dg-final { scan-assembler "movl[ \t]8\(%esp\), %edi" { target x32 } } } */ +/* { dg-final { scan-assembler "leal[ \t]12\(%rsp\), %esi" { target x32 } } } */ +/* { dg-final { scan-assembler "leal[ \t]4\(%rsi,%rdi,4\), %edx" { target x32 [...] +/* { dg-final { scan-assembler "movl[ \t]\(%esp\), %eax" { target ia32 } } } */ +/* { dg-final { scan-assembler "leal[ \t]4\(%esp\), %edx" { target ia32 } } } */ +/* { dg-final { scan-assembler "leal[ \t]8\(%esp,%eax,4\), %ecx" { target ia32 [...] diff --git a/gcc/testsuite/gcc.target/i386/pr66960-2.c b/gcc/testsuite/gcc.target/i [...] new file mode 100644 index 0000000..b9dbde2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr66960-2.c @@ -0,0 +1,33 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fno-omit-frame-pointer" { target { lp64 } } } */ +/* { dg-options "-O2 -fno-omit-frame-pointer -maddress-mode=short" { target { x32 [...] +/* { dg-options "-O2 -fno-omit-frame-pointer -miamcu" { target { ia32 } } } */ + +extern char **environ; +extern void exit (int status); +extern int main (long argc, char **argv, char **envp); + +void +_start (void) +{ + void *argc_p = __builtin_stack_top (); + char **argv = (char **) (argc_p + sizeof (void *)); + long argc = *(long *) argc_p; + int status; + + environ = argv + argc + 1; + + status = main (argc, argv, environ); + + exit (status); +} + +/* { dg-final { scan-assembler "movq[ \t]8\(%rbp\), %rdi" { target lp64 } } } */ +/* { dg-final { scan-assembler "leaq[ \t]16\(%rbp\), %rsi" { target lp64 } } } */ +/* { dg-final { scan-assembler "leaq[ \t]24\(%rbp,%rdi,8\), %rdx" { target lp6 [...] +/* { dg-final { scan-assembler "movl[ \t]8\(%ebp\), %edi" { target x32 } } } */ +/* { dg-final { scan-assembler "leal[ \t]12\(%rbp\), %esi" { target x32 } } } */ +/* { dg-final { scan-assembler "leal[ \t]4\(%rsi,%rdi,4\), %edx" { target x32 [...] +/* { dg-final { scan-assembler "movl[ \t]4\(%ebp\), %eax" { target ia32 } } } */ +/* { dg-final { scan-assembler "leal[ \t]8\(%ebp\), %edx" { target ia32 } } } */ +/* { dg-final { scan-assembler "leal[ \t]12\(%ebp,%eax,4\), %ecx" { target ia3 [...] diff --git a/gcc/testsuite/gcc.target/i386/pr66960-3.c b/gcc/testsuite/gcc.target/i [...] new file mode 100644 index 0000000..48cf25e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr66960-3.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -mno-accumulate-outgoing-args" { target { lp64 } } } */ +/* { dg-options "-O2 -mno-accumulate-outgoing-args -maddress-mode=short" { target [...] +/* { dg-options "-O2 -mno-accumulate-outgoing-args -miamcu" { target { ia32 } } } */ + +extern void abort (void); +extern int check_int (int *i, int align); +typedef int aligned __attribute__((aligned(64))); + +void * +foo (void) +{ + aligned j; + if (check_int (&j, __alignof__(j)) != j) + abort (); + return __builtin_stack_top (); +} /* { dg-message "sorry, unimplemented: .__builtin_stack_top. not supported" } */ diff --git a/gcc/testsuite/gcc.target/i386/pr66960-4.c b/gcc/testsuite/gcc.target/i [...] new file mode 100644 index 0000000..44c0b26 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr66960-4.c @@ -0,0 +1,21 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -maccumulate-outgoing-args" { target { lp64 } } } */ +/* { dg-options "-O2 -maccumulate-outgoing-args -maddress-mode=short" { target { x [...] +/* { dg-options "-O2 -maccumulate-outgoing-args -miamcu" { target { ia32 } } } */ + +extern void abort (void); +extern int check_int (int *i, int align); +typedef int aligned __attribute__((aligned(64))); + +void * +foo (void) +{ + aligned j; + if (check_int (&j, __alignof__(j)) != j) + abort (); + return __builtin_stack_top (); +} + +/* { dg-final { scan-assembler "leaq[ \t]8\(%rbp\), %rax" { target lp64 } } } */ +/* { dg-final { scan-assembler "leal[ \t]8\(%rbp\), %eax" { target x32 } } } */ +/* { dg-final { scan-assembler "leal[ \t]4\(%ebp\), %eax" { target ia32 } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr66960-5.c b/gcc/testsuite/gcc.target/i [...] new file mode 100644 index 0000000..d449437 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr66960-5.c @@ -0,0 +1,21 @@ +/* { dg-do link } */ +/* { dg-options "-O" } */ + +extern void link_error (void); + +__attribute__ ((noinline, noclone)) +void +foo (void) +{ + void **p = __builtin_stack_top (); + void *ra = __builtin_return_address (0); + if (*p != ra) + link_error (); +} + +int +main (void) +{ + foo (); + return 0; +}