Identified regression caused by *gcc:01b5038718056b024b370b74a874fbd92c5bbab3*:
commit 01b5038718056b024b370b74a874fbd92c5bbab3
Author: Aldy Hernandez <aldyh(a)redhat.com>
Disable threading through latches until after loop optimizations.
Results regressed to (for first_bad == 01b5038718056b024b370b74a874fbd92c5bbab3)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# true:
0
# benchmark -- -Os artifacts/build-01b5038718056b024b370b74a874fbd92c5bbab3/results_id:
1
# 459.GemsFDTD,GemsFDTD_base.default regressed by 102
# 464.h264ref,h264ref_base.default regressed by 102
from (for last_good == fb88bf9931f17d137eb50c001e1c924aa1e34e83)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# true:
0
# benchmark -- -Os artifacts/build-fb88bf9931f17d137eb50c001e1c924aa1e34e83/results_id:
1
This commit has regressed these CI configurations:
- tcwg_bmk_gnu_apm/gnu-master-aarch64-spec2k6-Os
Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_apm-gnu-master-aa…
Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_apm-gnu-master-aa…
Even more details: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_apm-gnu-master-aa…
Reproduce builds:
<cut>
mkdir investigate-gcc-01b5038718056b024b370b74a874fbd92c5bbab3
cd investigate-gcc-01b5038718056b024b370b74a874fbd92c5bbab3
# Fetch scripts
git clone https://git.linaro.org/toolchain/jenkins-scripts
# Fetch manifests and test.sh script
mkdir -p artifacts/manifests
curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_apm-gnu-master-aa… --fail
curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_apm-gnu-master-aa… --fail
curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_apm-gnu-master-aa… --fail
chmod +x artifacts/test.sh
# Reproduce the baseline build (build all pre-requisites)
./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh
# Save baseline build state (which is then restored in artifacts/test.sh)
mkdir -p ./bisect
rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /gcc/ ./ ./bisect/baseline/
cd gcc
# Reproduce first_bad build
git checkout --detach 01b5038718056b024b370b74a874fbd92c5bbab3
../artifacts/test.sh
# Reproduce last_good build
git checkout --detach fb88bf9931f17d137eb50c001e1c924aa1e34e83
../artifacts/test.sh
cd ..
</cut>
Full commit (up to 1000 lines):
<cut>
commit 01b5038718056b024b370b74a874fbd92c5bbab3
Author: Aldy Hernandez <aldyh(a)redhat.com>
Date: Thu Sep 9 20:30:28 2021 +0200
Disable threading through latches until after loop optimizations.
The motivation for this patch was enabling the use of global ranges in
the path solver, but this caused certain properties of loops being
destroyed which made subsequent loop optimizations to fail.
Consequently, this patch's mail goal is to disable jump threading
involving the latch until after loop optimizations have run.
As can be seen in the test adjustments, we mostly shift the threading
from the early threaders (ethread, thread[12] to the late threaders
thread[34]). I have nuked some of the early notes in the testcases
that came as part of the jump threader rewrite. They're mostly noise
now.
Note that we could probably relax some other restrictions in
profitable_path_p when loop optimizations have completed, but it would
require more testing, and I'm hesitant to touch more things than needed
at this point. I have added a reminder to the function to keep this
in mind.
Finally, perhaps as a follow-up, we should apply the same restrictions to
the forward threader. At some point I'd like to combine the cost models.
Tested on x86-64 Linux.
p.s. There is a thorough discussion involving the limitations of jump
threading involving loops here:
https://gcc.gnu.org/pipermail/gcc/2021-September/237247.html
gcc/ChangeLog:
* tree-pass.h (PROP_loop_opts_done): New.
* gimple-range-path.cc (path_range_query::internal_range_of_expr):
Intersect with global range.
* tree-ssa-loop.c (tree_ssa_loop_done): Set PROP_loop_opts_done.
* tree-ssa-threadbackward.c
(back_threader_profitability::profitable_path_p): Disable
threading through latches until after loop optimizations have run.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/ssa-dom-thread-2b.c: Adjust for disabling of
threading through latches.
* gcc.dg/tree-ssa/ssa-dom-thread-6.c: Same.
* gcc.dg/tree-ssa/ssa-dom-thread-7.c: Same.
Co-authored-by: Michael Matz <matz(a)suse.de>
---
gcc/gimple-range-path.cc | 3 ++
gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2b.c | 4 +--
gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c | 37 ++---------------------
gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c | 17 +----------
gcc/tree-pass.h | 2 ++
gcc/tree-ssa-loop.c | 2 +-
gcc/tree-ssa-threadbackward.c | 28 +++++++++++++++--
7 files changed, 37 insertions(+), 56 deletions(-)
diff --git a/gcc/gimple-range-path.cc b/gcc/gimple-range-path.cc
index a4fa3b296ff..c616b65756f 100644
--- a/gcc/gimple-range-path.cc
+++ b/gcc/gimple-range-path.cc
@@ -127,6 +127,9 @@ path_range_query::internal_range_of_expr (irange &r, tree name, gimple *stmt)
basic_block bb = stmt ? gimple_bb (stmt) : exit_bb ();
if (stmt && range_defined_in_block (r, name, bb))
{
+ if (TREE_CODE (name) == SSA_NAME)
+ r.intersect (gimple_range_global (name));
+
set_cache (r, name);
return true;
}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2b.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2b.c
index e1c33e86cd7..823ada982ff 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2b.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2b.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-thread1-stats -fdump-tree-dom2-stats -fdisable-tree-ethread" } */
+/* { dg-options "-O2 -fdump-tree-thread3-stats -fdump-tree-dom2-stats -fdisable-tree-ethread" } */
void foo();
void bla();
@@ -26,4 +26,4 @@ void thread_latch_through_header (void)
case. And we want to thread through the header as well. These
are both caught by threading in DOM. */
/* { dg-final { scan-tree-dump-not "Jumps threaded" "dom2"} } */
-/* { dg-final { scan-tree-dump-times "Jumps threaded: 1" 1 "thread1"} } */
+/* { dg-final { scan-tree-dump-times "Jumps threaded: 1" 1 "thread3"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c
index c7bf867b084..ee46759bacc 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c
@@ -1,41 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-thread1-details -fdump-tree-thread2-details" } */
+/* { dg-options "-O2 -fdump-tree-thread1-details -fdump-tree-thread3-details" } */
-/* All the threads in the thread1 dump start on a X->BB12 edge, as can
- be seen in the dump:
-
- Registering FSM jump thread: (x, 12) incoming edge; ...
- etc
- etc
-
- Before the new evrp, we were threading paths that started at the
- following edges:
-
- Registering FSM jump thread: (10, 12) incoming edge
- Registering FSM jump thread: (6, 12) incoming edge
- Registering FSM jump thread: (9, 12) incoming edge
-
- This was because the PHI at BB12 had constant values coming in from
- BB10, BB6, and BB9:
-
- # state_10 = PHI <state_11(7), 0(10), state_11(5), 1(6), state_11(8), 2(9), state_11(11)>
-
- Now with the new evrp, we get:
-
- # state_10 = PHI <0(7), 0(10), state_11(5), 1(6), 0(8), 2(9), 1(11)>
-
- Thus, we have 3 more paths that are known to be constant and can be
- threaded. Which means that by the second threading pass, we can
- only find one profitable path.
-
- For the record, all these extra constants are better paths coming
- out of switches. For example:
-
- SWITCH_BB -> BBx -> BBy -> BBz -> PHI
-
- We now know the value of the switch index at PHI. */
/* { dg-final { scan-tree-dump-times "Registering FSM jump" 6 "thread1" } } */
-/* { dg-final { scan-tree-dump-times "Registering FSM jump" 1 "thread2" } } */
+/* { dg-final { scan-tree-dump-times "Registering FSM jump" 1 "thread3" } } */
int sum0, sum1, sum2, sum3;
int foo (char *s, char **ret)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
index 5fc2145a432..ba07942f9dd 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
@@ -1,23 +1,8 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-thread1-stats -fdump-tree-thread2-stats -fdump-tree-dom2-stats -fdump-tree-thread3-stats -fdump-tree-dom3-stats -fdump-tree-vrp2-stats -fno-guess-branch-probability" } */
-/* Here we have the same issue as was commented in ssa-dom-thread-6.c.
- The PHI coming into the threader has a lot more constants, so the
- threader can thread more paths.
-
-$ diff clean/a.c.105t.mergephi2 a.c.105t.mergephi2
-252c252
-< # s_50 = PHI <s_49(10), 5(14), s_51(18), s_51(22), 1(26), 1(29), 1(31), s_51(5), 4(12), 1(15), 5(17), 1(19), 3(21), 1(23), 6(25), 7(28), s_51(30)>
----
-> # s_50 = PHI <s_49(10), 5(14), 4(18), 5(22), 1(26), 1(29), 1(31), s_51(5), 4(12), 1(15), 5(17), 1(19), 3(21), 1(23), 6(25), 7(28), 7(30)>
-272a273
-
- I spot checked a few and they all have the same pattern. We are
- basically tracking the switch index better through multiple
- paths. */
-
/* { dg-final { scan-tree-dump "Jumps threaded: 18" "thread1" } } */
-/* { dg-final { scan-tree-dump "Jumps threaded: 8" "thread2" } } */
+/* { dg-final { scan-tree-dump "Jumps threaded: 8" "thread3" } } */
/* { dg-final { scan-tree-dump-not "Jumps threaded" "dom2" } } */
/* aarch64 has the highest CASE_VALUES_THRESHOLD in GCC. It's high enough
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 83941bc0cee..eb75eb17951 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -225,6 +225,8 @@ protected:
been optimized. */
#define PROP_gimple_lomp_dev (1 << 16) /* done omp_device_lower */
#define PROP_rtl_split_insns (1 << 17) /* RTL has insns split. */
+#define PROP_loop_opts_done (1 << 18) /* SSA loop optimizations
+ have completed. */
#define PROP_gimple \
(PROP_gimple_any | PROP_gimple_lcf | PROP_gimple_leh | PROP_gimple_lomp)
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index 0cc4b3bbccf..1bbf2f1fb2c 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -540,7 +540,7 @@ const pass_data pass_data_tree_loop_done =
OPTGROUP_LOOP, /* optinfo_flags */
TV_NONE, /* tv_id */
PROP_cfg, /* properties_required */
- 0, /* properties_provided */
+ PROP_loop_opts_done, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_cleanup_cfg, /* todo_flags_finish */
diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c
index 449232c7715..e72992328de 100644
--- a/gcc/tree-ssa-threadbackward.c
+++ b/gcc/tree-ssa-threadbackward.c
@@ -43,6 +43,7 @@ along with GCC; see the file COPYING3. If not see
#include "ssa.h"
#include "tree-cfgcleanup.h"
#include "tree-pretty-print.h"
+#include "cfghooks.h"
// Path registry for the backwards threader. After all paths have been
// registered with register_path(), thread_through_all_blocks() is called
@@ -564,7 +565,10 @@ back_threader_registry::thread_through_all_blocks (bool may_peel_loop_headers)
TAKEN_EDGE, otherwise it is NULL.
CREATES_IRREDUCIBLE_LOOP, if non-null is set to TRUE if threading this path
- would create an irreducible loop. */
+ would create an irreducible loop.
+
+ ?? It seems we should be able to loosen some of the restrictions in
+ this function after loop optimizations have run. */
bool
back_threader_profitability::profitable_path_p (const vec<basic_block> &m_path,
@@ -725,7 +729,11 @@ back_threader_profitability::profitable_path_p (const vec<basic_block> &m_path,
the last entry in the array when determining if we thread
through the loop latch. */
if (loop->latch == bb)
- threaded_through_latch = true;
+ {
+ threaded_through_latch = true;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, " (latch)");
+ }
}
gimple *stmt = get_gimple_control_stmt (m_path[0]);
@@ -845,6 +853,22 @@ back_threader_profitability::profitable_path_p (const vec<basic_block> &m_path,
"a multiway branch.\n");
return false;
}
+
+ /* Threading through an empty latch would cause code to be added to
+ the latch. This could alter the loop form sufficiently to cause
+ loop optimizations to fail. Disable these threads until after
+ loop optimizations have run. */
+ if ((threaded_through_latch
+ || (taken_edge && taken_edge->dest == loop->latch))
+ && !(cfun->curr_properties & PROP_loop_opts_done)
+ && empty_block_p (loop->latch))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ " FAIL: FSM Thread through latch before loop opts would create non-empty latch\n");
+ return false;
+
+ }
return true;
}
</cut>
Progress
* UM-2 [QEMU upstream maintainership]
+ Respin of a linux-user cleanup patchset
+ Code review, as usual
* QEMU-406 [QEMU support for MVE (M-profile Vector Extension; Helium)]
+ Working on version 2 of the "optimized code gen for MVE" patchset;
this now covers all the insns that have an easy optimized version.
-- PMM
Successfully identified regression in *linux* in CI configuration tcwg_kernel/gnu-master-arm-mainline-allmodconfig. So far, this commit has regressed CI configurations:
- tcwg_kernel/gnu-master-arm-mainline-allmodconfig
Culprit:
<cut>
commit 3fe617ccafd6f5bb33c2391d6f4eeb41c1fd0151
Author: Linus Torvalds <torvalds(a)linux-foundation.org>
Date: Sun Sep 5 11:24:05 2021 -0700
Enable '-Werror' by default for all kernel builds
... but make it a config option so that broken environments can disable
it when required.
We really should always have a clean build, and will disable specific
over-eager warnings as required, if we can't fix them. But while I
fairly religiously enforce that in my own tree, it doesn't get enforced
by various build robots that don't necessarily report warnings.
So this just makes '-Werror' a default compiler flag, but allows people
to disable it for their configuration if they have some particular
issues.
Occasionally, new compiler versions end up enabling new warnings, and it
can take a while before we have them fixed (or the warnings disabled if
that is what it takes), so the config option allows for that situation.
Hopefully this will mean that I get fewer pull requests that have new
warnings that were not noticed by various automation we have in place.
Knock wood.
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
</cut>
Results regressed to (for first_bad == 3fe617ccafd6f5bb33c2391d6f4eeb41c1fd0151)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1:
-5
# build_abe qemu:
-2
# linux_n_obj:
21769
# First few build errors in logs:
from (for last_good == fd47ff55c9c31101fcc06d20cb381da3d4089bd5)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1:
-5
# build_abe qemu:
-2
# linux_n_obj:
29880
# linux build successful:
all
Artifacts of last_good build: https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-mainline-al…
Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-mainline-al…
Build top page/logs: https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-mainline-al…
Configuration details:
Reproduce builds:
<cut>
mkdir investigate-linux-3fe617ccafd6f5bb33c2391d6f4eeb41c1fd0151
cd investigate-linux-3fe617ccafd6f5bb33c2391d6f4eeb41c1fd0151
git clone https://git.linaro.org/toolchain/jenkins-scripts
mkdir -p artifacts/manifests
curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-mainline-al… --fail
curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-mainline-al… --fail
curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-mainline-al… --fail
chmod +x artifacts/test.sh
# Reproduce the baseline build (build all pre-requisites)
./jenkins-scripts/tcwg_kernel-build.sh @@ artifacts/manifests/build-baseline.sh
# Save baseline build state (which is then restored in artifacts/test.sh)
mkdir -p ./bisect
rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /linux/ ./ ./bisect/baseline/
cd linux
# Reproduce first_bad build
git checkout --detach 3fe617ccafd6f5bb33c2391d6f4eeb41c1fd0151
../artifacts/test.sh
# Reproduce last_good build
git checkout --detach fd47ff55c9c31101fcc06d20cb381da3d4089bd5
../artifacts/test.sh
cd ..
</cut>
History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/…
Artifacts: https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-mainline-al…
Build log: https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-mainline-al…
Full commit (up to 1000 lines):
<cut>
commit 3fe617ccafd6f5bb33c2391d6f4eeb41c1fd0151
Author: Linus Torvalds <torvalds(a)linux-foundation.org>
Date: Sun Sep 5 11:24:05 2021 -0700
Enable '-Werror' by default for all kernel builds
... but make it a config option so that broken environments can disable
it when required.
We really should always have a clean build, and will disable specific
over-eager warnings as required, if we can't fix them. But while I
fairly religiously enforce that in my own tree, it doesn't get enforced
by various build robots that don't necessarily report warnings.
So this just makes '-Werror' a default compiler flag, but allows people
to disable it for their configuration if they have some particular
issues.
Occasionally, new compiler versions end up enabling new warnings, and it
can take a while before we have them fixed (or the warnings disabled if
that is what it takes), so the config option allows for that situation.
Hopefully this will mean that I get fewer pull requests that have new
warnings that were not noticed by various automation we have in place.
Knock wood.
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
---
Makefile | 3 +++
init/Kconfig | 14 ++++++++++++++
2 files changed, 17 insertions(+)
diff --git a/Makefile b/Makefile
index 6bc1c5b17a62..d45fc2edf186 100644
--- a/Makefile
+++ b/Makefile
@@ -785,6 +785,9 @@ stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG) := -fstack-protector-strong
KBUILD_CFLAGS += $(stackp-flags-y)
+KBUILD_CFLAGS-$(CONFIG_WERROR) += -Werror
+KBUILD_CFLAGS += $(KBUILD_CFLAGS-y)
+
ifdef CONFIG_CC_IS_CLANG
KBUILD_CPPFLAGS += -Qunused-arguments
# The kernel builds with '-std=gnu89' so use of GNU extensions is acceptable.
diff --git a/init/Kconfig b/init/Kconfig
index e708180e9a59..8cb97f141b70 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -137,6 +137,20 @@ config COMPILE_TEST
here. If you are a user/distributor, say N here to exclude useless
drivers to be distributed.
+config WERROR
+ bool "Compile the kernel with warnings as errors"
+ default y
+ help
+ A kernel build should not cause any compiler warnings, and this
+ enables the '-Werror' flag to enforce that rule by default.
+
+ However, if you have a new (or very old) compiler with odd and
+ unusual warnings, or you have some architecture with problems,
+ you may need to disable this config option in order to
+ successfully build the kernel.
+
+ If in doubt, say Y.
+
config UAPI_HEADER_TEST
bool "Compile test UAPI headers"
depends on HEADERS_INSTALL && CC_CAN_LINK
</cut>
Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O3_LTO. So far, this commit has regressed CI configurations:
- tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O3_LTO
Culprit:
<cut>
commit 19dc02e99f802922a3af69e802465bee0723b57a
Author: Nikita Popov <nikita.ppv(a)gmail.com>
Date: Sun Aug 22 18:15:55 2021 +0200
[MergeICmps] Allow sinking past non-load/store
This is a followup to D106591. MergeICmps currently only allows
sinking the loads past either instructions that don't write to
memory at all, or simple loads/stores that don't modify the memory
the loads access.
The "simple loads/stores" part of this check doesn't seem necessary
to me -- AA isModRef() already accurately models any operation
that may clobber the memory. For example, in the adjusted test case
the transform is still fine if the call to @foo() isn't readonly,
but inaccessiblememonly -- in both cases, the call cannot modify
the loaded memory.
Differential Revision: https://reviews.llvm.org/D108517
</cut>
Results regressed to (for first_bad == 19dc02e99f802922a3af69e802465bee0723b57a)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# build_llvm true:
-3
# true:
0
# benchmark -- -O3_LTO artifacts/build-19dc02e99f802922a3af69e802465bee0723b57a/results_id:
1
# 464.h264ref,h264ref_base.default regressed by 105
from (for last_good == da12d88b1c5fc42b49b92fcf94917ca489dd677f)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# build_llvm true:
-3
# true:
0
# benchmark -- -O3_LTO artifacts/build-da12d88b1c5fc42b49b92fcf94917ca489dd677f/results_id:
1
Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-…
Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O3_LTO/4822
Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-…
Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O3_LTO/4807
Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-…
Configuration details:
Reproduce builds:
<cut>
mkdir investigate-llvm-19dc02e99f802922a3af69e802465bee0723b57a
cd investigate-llvm-19dc02e99f802922a3af69e802465bee0723b57a
git clone https://git.linaro.org/toolchain/jenkins-scripts
mkdir -p artifacts/manifests
curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail
curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail
curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail
chmod +x artifacts/test.sh
# Reproduce the baseline build (build all pre-requisites)
./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh
# Save baseline build state (which is then restored in artifacts/test.sh)
mkdir -p ./bisect
rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/
cd llvm
# Reproduce first_bad build
git checkout --detach 19dc02e99f802922a3af69e802465bee0723b57a
../artifacts/test.sh
# Reproduce last_good build
git checkout --detach da12d88b1c5fc42b49b92fcf94917ca489dd677f
../artifacts/test.sh
cd ..
</cut>
History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/…
Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-…
Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-…
Full commit (up to 1000 lines):
<cut>
commit 19dc02e99f802922a3af69e802465bee0723b57a
Author: Nikita Popov <nikita.ppv(a)gmail.com>
Date: Sun Aug 22 18:15:55 2021 +0200
[MergeICmps] Allow sinking past non-load/store
This is a followup to D106591. MergeICmps currently only allows
sinking the loads past either instructions that don't write to
memory at all, or simple loads/stores that don't modify the memory
the loads access.
The "simple loads/stores" part of this check doesn't seem necessary
to me -- AA isModRef() already accurately models any operation
that may clobber the memory. For example, in the adjusted test case
the transform is still fine if the call to @foo() isn't readonly,
but inaccessiblememonly -- in both cases, the call cannot modify
the loaded memory.
Differential Revision: https://reviews.llvm.org/D108517
---
llvm/lib/Transforms/Scalar/MergeICmps.cpp | 14 +-------------
.../Transforms/MergeICmps/X86/split-block-does-work.ll | 2 +-
2 files changed, 2 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index f13f24ad2027..34465c76dd3d 100644
--- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -66,15 +66,6 @@ namespace {
#define DEBUG_TYPE "mergeicmps"
-// Returns true if the instruction is a simple load or a simple store
-static bool isSimpleLoadOrStore(const Instruction *I) {
- if (const LoadInst *LI = dyn_cast<LoadInst>(I))
- return LI->isSimple();
- if (const StoreInst *SI = dyn_cast<StoreInst>(I))
- return SI->isSimple();
- return false;
-}
-
// A BCE atom "Binary Compare Expression Atom" represents an integer load
// that is a constant offset from a base value, e.g. `a` or `o.c` in the example
// at the top.
@@ -244,10 +235,7 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
// If this instruction may clobber the loads and is in middle of the BCE cmp
// block instructions, then bail for now.
if (Inst->mayWriteToMemory()) {
- // Bail if this is not a simple load or store
- if (!isSimpleLoadOrStore(Inst))
- return false;
- // Disallow stores that might alias the BCE operands
+ // Disallow instructions that might modify the BCE operands
MemoryLocation LLoc = MemoryLocation::get(Cmp.Lhs.LoadI);
MemoryLocation RLoc = MemoryLocation::get(Cmp.Rhs.LoadI);
if (isModSet(AA.getModRefInfo(Inst, LLoc)) ||
diff --git a/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll b/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll
index 0b9663f44980..1e341b92918d 100644
--- a/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll
+++ b/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll
@@ -3,7 +3,7 @@
%S = type { i32, i32, i32, i32 }
-declare void @foo(...) readonly
+declare void @foo(...) inaccessiblememonly
; We can split %entry and create a memcmp(16 bytes).
define zeroext i1 @opeq1(
</cut>
Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O2. So far, this commit has regressed CI configurations:
- tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O2
Culprit:
<cut>
commit d39d3a327b1303012370e47d991459ffbfce45ef
Author: Peyton, Jonathan L <jonathan.l.peyton(a)intel.com>
Date: Fri Aug 20 16:06:13 2021 -0500
[OpenMP][test] fix omp_get_wtime.c test to be more accommodating
The omp_get_wtime.c test fails intermittently if the recorded times are
off by too much which can happen when many tests are run in parallel.
Instead of failing if one timing is a little off, take average of 100
timings minus the 10 worst.
Differential Revision: https://reviews.llvm.org/D108488
</cut>
Results regressed to (for first_bad == d39d3a327b1303012370e47d991459ffbfce45ef)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# build_llvm true:
-3
# true:
0
# benchmark -- -O2 artifacts/build-d39d3a327b1303012370e47d991459ffbfce45ef/results_id:
1
# 447.dealII,dealII_base.default regressed by 105
from (for last_good == f77174d4b8cfba3c0a53c78e53edbbaf57e37fc5)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# build_llvm true:
-3
# true:
0
# benchmark -- -O2 artifacts/build-f77174d4b8cfba3c0a53c78e53edbbaf57e37fc5/results_id:
1
Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-…
Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O2/4734
Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-…
Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O2/4757
Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-…
Configuration details:
Reproduce builds:
<cut>
mkdir investigate-llvm-d39d3a327b1303012370e47d991459ffbfce45ef
cd investigate-llvm-d39d3a327b1303012370e47d991459ffbfce45ef
git clone https://git.linaro.org/toolchain/jenkins-scripts
mkdir -p artifacts/manifests
curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail
curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail
curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail
chmod +x artifacts/test.sh
# Reproduce the baseline build (build all pre-requisites)
./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh
# Save baseline build state (which is then restored in artifacts/test.sh)
mkdir -p ./bisect
rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/
cd llvm
# Reproduce first_bad build
git checkout --detach d39d3a327b1303012370e47d991459ffbfce45ef
../artifacts/test.sh
# Reproduce last_good build
git checkout --detach f77174d4b8cfba3c0a53c78e53edbbaf57e37fc5
../artifacts/test.sh
cd ..
</cut>
History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/…
Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-…
Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-…
Full commit (up to 1000 lines):
<cut>
commit d39d3a327b1303012370e47d991459ffbfce45ef
Author: Peyton, Jonathan L <jonathan.l.peyton(a)intel.com>
Date: Fri Aug 20 16:06:13 2021 -0500
[OpenMP][test] fix omp_get_wtime.c test to be more accommodating
The omp_get_wtime.c test fails intermittently if the recorded times are
off by too much which can happen when many tests are run in parallel.
Instead of failing if one timing is a little off, take average of 100
timings minus the 10 worst.
Differential Revision: https://reviews.llvm.org/D108488
---
openmp/runtime/test/api/omp_get_wtime.c | 75 ++++++++++++++++++++++++++-------
1 file changed, 59 insertions(+), 16 deletions(-)
diff --git a/openmp/runtime/test/api/omp_get_wtime.c b/openmp/runtime/test/api/omp_get_wtime.c
index e2bb211e0ce4..a862e07fc5a2 100644
--- a/openmp/runtime/test/api/omp_get_wtime.c
+++ b/openmp/runtime/test/api/omp_get_wtime.c
@@ -4,30 +4,73 @@
#include "omp_testsuite.h"
#include "omp_my_sleep.h"
-int test_omp_get_wtime()
-{
+#define NTIMES 100
+
+// This is the error % threshold. Be generous with the error threshold since
+// this test may be run in parallel with many other tests it may throw off the
+// sleep timing.
+#define THRESHOLD 33.0
+
+double test_omp_get_wtime(double desired_wait_time) {
double start;
double end;
- double measured_time;
- double wait_time = 0.2;
start = 0;
end = 0;
start = omp_get_wtime();
- my_sleep (wait_time);
+ my_sleep(desired_wait_time);
end = omp_get_wtime();
- measured_time = end-start;
- return ((measured_time > 0.97 * wait_time) && (measured_time < 1.03 * wait_time)) ;
+ return end - start;
}
-int main()
-{
- int i;
- int num_failed=0;
+int compare_times(const void *lhs, const void *rhs) {
+ const double *a = (const double *)lhs;
+ const double *b = (const double *)rhs;
+ return *a - *b;
+}
+
+int main() {
+ int i, final_count;
+ double percent_off;
+ double *begin, *end, *ptr;
+ double wait_time = 0.01;
+ double average = 0.0;
+ double n = 0.0;
+ double *times = (double *)malloc(sizeof(double) * NTIMES);
+
+ // Get each timing
+ for (i = 0; i < NTIMES; i++) {
+ times[i] = test_omp_get_wtime(wait_time);
+ }
+
+ // Remove approx the "worst" tenth of the timings
+ qsort(times, NTIMES, sizeof(double), compare_times);
+ begin = times;
+ end = times + NTIMES;
+ for (i = 0; i < NTIMES / 10; ++i) {
+ if (i % 2 == 0)
+ begin++;
+ else
+ end--;
+ }
+
+ // Get the average of the remaining timings
+ for (ptr = begin, final_count = 0; ptr != end; ++ptr, ++final_count)
+ average += times[i];
+ average /= (double)final_count;
+ free(times);
+
+ // Calculate the percent off of desired wait time
+ percent_off = (average - wait_time) / wait_time * 100.0;
+ // Should always be positive, but just in case
+ if (percent_off < 0)
+ percent_off = -percent_off;
- for(i = 0; i < REPETITIONS; i++) {
- if(!test_omp_get_wtime()) {
- num_failed++;
- }
+ if (percent_off > (double)THRESHOLD) {
+ fprintf(stderr, "error: average of %d runs (%lf) is of by %lf%%\n", NTIMES,
+ average, percent_off);
+ return EXIT_FAILURE;
}
- return num_failed;
+ printf("pass: average of %d runs (%lf) is only off by %lf%%\n", NTIMES,
+ average, percent_off);
+ return EXIT_SUCCESS;
}
</cut>