Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-release-aarch64-spec2k6-O3_LTO. So far, this commit has regressed CI configurations:
- tcwg_bmk_llvm_tx1/llvm-release-aarch64-spec2k6-O3_LTO
Culprit:
<cut>
commit 669ddd1e9b1226432b003dbba05b99f8e992285b
Author: Arthur Eubanks <aeubanks(a)google.com>
Date: Mon Jan 25 11:00:56 2021 -0800
Turn on the new pass manager by default
This turns on the new pass manager by default for the optimization pipeline in
Clang and ThinLTO in various LLD backends. This also makes uses of `opt
-instcombine` use the new pass manager (unless specifically opted out).
This does not affect the backend target-dependent codegen pipeline.
If this causes regressions, you can opt out of the new pass manager
either via the -DENABLE_EXPERIMENTAL_NEW_PASS_MANAGER=OFF CMake flag
while building LLVM, or via various compiler flags, e.g.
-flegacy-pass-manager for Clang or -Wl,--lto-legacy-pass-manager for
ELF LLD. Please file bugs for any regressions.
Major differences:
* The inliner works slightly differently
* -O1 does some amount of inlining
* LCSSA and LoopSimplify are run before all loop passes
* Loop unswitching is implemented slightly differently
* A new SpeculateAroundPHIs pass is added to the pipeline
https://lists.llvm.org/pipermail/llvm-dev/2021-January/148098.html
Reviewed By: asbirlea, ychen, MaskRay, echristo
Differential Revision: https://reviews.llvm.org/D95380
</cut>
Results regressed to (for first_bad == 669ddd1e9b1226432b003dbba05b99f8e992285b)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# build_llvm true:
-3
# true:
0
# benchmark -- -O3_LTO artifacts/build-669ddd1e9b1226432b003dbba05b99f8e992285b/results_id:
1
# 473.astar,astar_base.default regressed by 106
from (for last_good == b15cbaf5a03d0b32dbc32c37766e32ccf66e6c87)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# build_llvm true:
-3
# true:
0
# benchmark -- -O3_LTO artifacts/build-b15cbaf5a03d0b32dbc32c37766e32ccf66e6c87/results_id:
1
Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release…
Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-release-aarch64-spec2k6-O3_LTO/3543
Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release…
Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-release-aarch64-spec2k6-O3_LTO/3539
Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release…
Configuration details:
Reproduce builds:
<cut>
mkdir investigate-llvm-669ddd1e9b1226432b003dbba05b99f8e992285b
cd investigate-llvm-669ddd1e9b1226432b003dbba05b99f8e992285b
git clone https://git.linaro.org/toolchain/jenkins-scripts
mkdir -p artifacts/manifests
curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail
curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail
curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail
chmod +x artifacts/test.sh
# Reproduce the baseline build (build all pre-requisites)
./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh
# Save baseline build state (which is then restored in artifacts/test.sh)
mkdir -p ./bisect
rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/
cd llvm
# Reproduce first_bad build
git checkout --detach 669ddd1e9b1226432b003dbba05b99f8e992285b
../artifacts/test.sh
# Reproduce last_good build
git checkout --detach b15cbaf5a03d0b32dbc32c37766e32ccf66e6c87
../artifacts/test.sh
cd ..
</cut>
History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/…
Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release…
Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release…
Full commit (up to 1000 lines):
<cut>
commit 669ddd1e9b1226432b003dbba05b99f8e992285b
Author: Arthur Eubanks <aeubanks(a)google.com>
Date: Mon Jan 25 11:00:56 2021 -0800
Turn on the new pass manager by default
This turns on the new pass manager by default for the optimization pipeline in
Clang and ThinLTO in various LLD backends. This also makes uses of `opt
-instcombine` use the new pass manager (unless specifically opted out).
This does not affect the backend target-dependent codegen pipeline.
If this causes regressions, you can opt out of the new pass manager
either via the -DENABLE_EXPERIMENTAL_NEW_PASS_MANAGER=OFF CMake flag
while building LLVM, or via various compiler flags, e.g.
-flegacy-pass-manager for Clang or -Wl,--lto-legacy-pass-manager for
ELF LLD. Please file bugs for any regressions.
Major differences:
* The inliner works slightly differently
* -O1 does some amount of inlining
* LCSSA and LoopSimplify are run before all loop passes
* Loop unswitching is implemented slightly differently
* A new SpeculateAroundPHIs pass is added to the pipeline
https://lists.llvm.org/pipermail/llvm-dev/2021-January/148098.html
Reviewed By: asbirlea, ychen, MaskRay, echristo
Differential Revision: https://reviews.llvm.org/D95380
---
llvm/CMakeLists.txt | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 1affc289e64b..f5298de9f7ca 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -688,8 +688,8 @@ else()
endif()
option(LLVM_ENABLE_PLUGINS "Enable plugin support" ${LLVM_ENABLE_PLUGINS_default})
-set(ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER FALSE CACHE BOOL
- "Enable the experimental new pass manager by default.")
+set(ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER TRUE CACHE BOOL
+ "Enable the new pass manager by default.")
include(HandleLLVMOptions)
</cut>
Successfully identified regression in *gcc* in CI configuration tcwg_bmk_gnu_tx1/gnu-release-aarch64-spec2k6-O2. So far, this commit has regressed CI configurations:
- tcwg_bmk_gnu_tx1/gnu-release-aarch64-spec2k6-O2
Culprit:
<cut>
commit df7c22831f1e48dba49479c5960c1c180d8eab2c
Author: Richard Sandiford <richard.sandiford(a)arm.com>
Date: Thu Nov 14 15:12:58 2019 +0000
Support vectorisation with mixed vector sizes
After previous patches, it's now possible to make the vectoriser
support multiple vector sizes in the same vector region, using
related_vector_mode to pick the right vector mode for a given
element mode. No port yet takes advantage of this, but I have
a follow-on patch for AArch64.
This patch also seemed like a good opportunity to add some more dump
messages: one to make it clear which vector size/mode was being used
when analysis passed or failed, and another to say when we've decided
to skip a redundant vector size/mode.
2019-11-14 Richard Sandiford <richard.sandiford(a)arm.com>
gcc/
* machmode.h (opt_machine_mode::operator==): New function.
(opt_machine_mode::operator!=): Likewise.
* tree-vectorizer.h (vec_info::vector_mode): Update comment.
(get_related_vectype_for_scalar_type): Delete.
(get_vectype_for_scalar_type_and_size): Declare.
* tree-vect-slp.c (vect_slp_bb_region): Print dump messages to say
whether analysis passed or failed, and with what vector modes.
Use related_vector_mode to check whether trying a particular
vector mode would be redundant with the autodetected mode,
and print a dump message if we decide to skip it.
* tree-vect-loop.c (vect_analyze_loop): Likewise.
(vect_create_epilog_for_reduction): Use
get_related_vectype_for_scalar_type instead of
get_vectype_for_scalar_type_and_size.
* tree-vect-stmts.c (get_vectype_for_scalar_type_and_size): Replace
with...
(get_related_vectype_for_scalar_type): ...this new function.
Take a starting/"prevailing" vector mode rather than a vector size.
Take an optional nunits argument, with the same meaning as for
related_vector_mode. Use related_vector_mode when not
auto-detecting a mode, falling back to mode_for_vector if no
target mode exists.
(get_vectype_for_scalar_type): Update accordingly.
(get_same_sized_vectype): Likewise.
* tree-vectorizer.c (get_vec_alignment_for_array_type): Likewise.
From-SVN: r278240
</cut>
Results regressed to (for first_bad == df7c22831f1e48dba49479c5960c1c180d8eab2c)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# true:
0
# benchmark -- -O2 artifacts/build-df7c22831f1e48dba49479c5960c1c180d8eab2c/results_id:
1
# 453.povray,[.] _ZN3povL24All_Sphere_IntersectionsEPNS_13Objec regressed by 114
# 482.sphinx3,[.] subvq_mgau_shortlist regressed by 112
from (for last_good == 7f52eb891b738337d5cf82c7c440a5eea8c7b0c9)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# true:
0
# benchmark -- -O2 artifacts/build-7f52eb891b738337d5cf82c7c440a5eea8c7b0c9/results_id:
1
Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a…
Results ID of last_good: tx1_64/tcwg_bmk_gnu_tx1/bisect-gnu-release-aarch64-spec2k6-O2/3483
Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a…
Results ID of first_bad: tx1_64/tcwg_bmk_gnu_tx1/bisect-gnu-release-aarch64-spec2k6-O2/3492
Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a…
Configuration details:
Reproduce builds:
<cut>
mkdir investigate-gcc-df7c22831f1e48dba49479c5960c1c180d8eab2c
cd investigate-gcc-df7c22831f1e48dba49479c5960c1c180d8eab2c
git clone https://git.linaro.org/toolchain/jenkins-scripts
mkdir -p artifacts/manifests
curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… --fail
curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… --fail
curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… --fail
chmod +x artifacts/test.sh
# Reproduce the baseline build (build all pre-requisites)
./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh
# Save baseline build state (which is then restored in artifacts/test.sh)
mkdir -p ./bisect
rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /gcc/ ./ ./bisect/baseline/
cd gcc
# Reproduce first_bad build
git checkout --detach df7c22831f1e48dba49479c5960c1c180d8eab2c
../artifacts/test.sh
# Reproduce last_good build
git checkout --detach 7f52eb891b738337d5cf82c7c440a5eea8c7b0c9
../artifacts/test.sh
cd ..
</cut>
History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/…
Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a…
Build log: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a…
Full commit (up to 1000 lines):
<cut>
commit df7c22831f1e48dba49479c5960c1c180d8eab2c
Author: Richard Sandiford <richard.sandiford(a)arm.com>
Date: Thu Nov 14 15:12:58 2019 +0000
Support vectorisation with mixed vector sizes
After previous patches, it's now possible to make the vectoriser
support multiple vector sizes in the same vector region, using
related_vector_mode to pick the right vector mode for a given
element mode. No port yet takes advantage of this, but I have
a follow-on patch for AArch64.
This patch also seemed like a good opportunity to add some more dump
messages: one to make it clear which vector size/mode was being used
when analysis passed or failed, and another to say when we've decided
to skip a redundant vector size/mode.
2019-11-14 Richard Sandiford <richard.sandiford(a)arm.com>
gcc/
* machmode.h (opt_machine_mode::operator==): New function.
(opt_machine_mode::operator!=): Likewise.
* tree-vectorizer.h (vec_info::vector_mode): Update comment.
(get_related_vectype_for_scalar_type): Delete.
(get_vectype_for_scalar_type_and_size): Declare.
* tree-vect-slp.c (vect_slp_bb_region): Print dump messages to say
whether analysis passed or failed, and with what vector modes.
Use related_vector_mode to check whether trying a particular
vector mode would be redundant with the autodetected mode,
and print a dump message if we decide to skip it.
* tree-vect-loop.c (vect_analyze_loop): Likewise.
(vect_create_epilog_for_reduction): Use
get_related_vectype_for_scalar_type instead of
get_vectype_for_scalar_type_and_size.
* tree-vect-stmts.c (get_vectype_for_scalar_type_and_size): Replace
with...
(get_related_vectype_for_scalar_type): ...this new function.
Take a starting/"prevailing" vector mode rather than a vector size.
Take an optional nunits argument, with the same meaning as for
related_vector_mode. Use related_vector_mode when not
auto-detecting a mode, falling back to mode_for_vector if no
target mode exists.
(get_vectype_for_scalar_type): Update accordingly.
(get_same_sized_vectype): Likewise.
* tree-vectorizer.c (get_vec_alignment_for_array_type): Likewise.
From-SVN: r278240
---
gcc/ChangeLog | 28 +++++++++++++++++++++++++
gcc/machmode.h | 3 +++
gcc/tree-vect-loop.c | 54 +++++++++++++++++++++++++++++++++++-------------
gcc/tree-vect-slp.c | 33 +++++++++++++++++++++++++----
gcc/tree-vect-stmts.c | 57 ++++++++++++++++++++++++++++++++++++---------------
gcc/tree-vectorizer.c | 2 +-
gcc/tree-vectorizer.h | 8 +++++---
7 files changed, 147 insertions(+), 38 deletions(-)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 41c94140b1a..680aa85121a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,31 @@
+2019-11-14 Richard Sandiford <richard.sandiford(a)arm.com>
+
+ * machmode.h (opt_machine_mode::operator==): New function.
+ (opt_machine_mode::operator!=): Likewise.
+ * tree-vectorizer.h (vec_info::vector_mode): Update comment.
+ (get_related_vectype_for_scalar_type): Delete.
+ (get_vectype_for_scalar_type_and_size): Declare.
+ * tree-vect-slp.c (vect_slp_bb_region): Print dump messages to say
+ whether analysis passed or failed, and with what vector modes.
+ Use related_vector_mode to check whether trying a particular
+ vector mode would be redundant with the autodetected mode,
+ and print a dump message if we decide to skip it.
+ * tree-vect-loop.c (vect_analyze_loop): Likewise.
+ (vect_create_epilog_for_reduction): Use
+ get_related_vectype_for_scalar_type instead of
+ get_vectype_for_scalar_type_and_size.
+ * tree-vect-stmts.c (get_vectype_for_scalar_type_and_size): Replace
+ with...
+ (get_related_vectype_for_scalar_type): ...this new function.
+ Take a starting/"prevailing" vector mode rather than a vector size.
+ Take an optional nunits argument, with the same meaning as for
+ related_vector_mode. Use related_vector_mode when not
+ auto-detecting a mode, falling back to mode_for_vector if no
+ target mode exists.
+ (get_vectype_for_scalar_type): Update accordingly.
+ (get_same_sized_vectype): Likewise.
+ * tree-vectorizer.c (get_vec_alignment_for_array_type): Likewise.
+
2019-11-14 Richard Sandiford <richard.sandiford(a)arm.com>
* tree-vect-stmts.c (vectorizable_call): Require the types
diff --git a/gcc/machmode.h b/gcc/machmode.h
index 6750833c2fe..a507ed66c3f 100644
--- a/gcc/machmode.h
+++ b/gcc/machmode.h
@@ -258,6 +258,9 @@ public:
bool exists () const;
template<typename U> bool exists (U *) const;
+ bool operator== (const T &m) const { return m_mode == m; }
+ bool operator!= (const T &m) const { return m_mode != m; }
+
private:
machine_mode m_mode;
};
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 213d620ed2c..e60c159d11a 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2435,6 +2435,17 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts);
if (mode_i == 0)
autodetected_vector_mode = loop_vinfo->vector_mode;
+ if (dump_enabled_p ())
+ {
+ if (res)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "***** Analysis succeeded with vector mode %s\n",
+ GET_MODE_NAME (loop_vinfo->vector_mode));
+ else
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "***** Analysis failed with vector mode %s\n",
+ GET_MODE_NAME (loop_vinfo->vector_mode));
+ }
loop->aux = NULL;
if (res)
@@ -2501,9 +2512,22 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
}
if (mode_i < vector_modes.length ()
- && known_eq (GET_MODE_SIZE (vector_modes[mode_i]),
- GET_MODE_SIZE (autodetected_vector_mode)))
- mode_i += 1;
+ && VECTOR_MODE_P (autodetected_vector_mode)
+ && (related_vector_mode (vector_modes[mode_i],
+ GET_MODE_INNER (autodetected_vector_mode))
+ == autodetected_vector_mode)
+ && (related_vector_mode (autodetected_vector_mode,
+ GET_MODE_INNER (vector_modes[mode_i]))
+ == vector_modes[mode_i]))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "***** Skipping vector mode %s, which would"
+ " repeat the analysis for %s\n",
+ GET_MODE_NAME (vector_modes[mode_i]),
+ GET_MODE_NAME (autodetected_vector_mode));
+ mode_i += 1;
+ }
if (mode_i == vector_modes.length ()
|| autodetected_vector_mode == VOIDmode)
@@ -4898,13 +4922,14 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
halves against each other. */
enum machine_mode mode1 = mode;
tree stype = TREE_TYPE (vectype);
- unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
- unsigned sz1 = sz;
+ unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
+ unsigned nunits1 = nunits;
if (!slp_reduc
&& (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
- sz1 = GET_MODE_SIZE (mode1).to_constant ();
+ nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
- tree vectype1 = get_vectype_for_scalar_type_and_size (stype, sz1);
+ tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
+ stype, nunits1);
reduce_with_shift = have_whole_vector_shift (mode1);
if (!VECTOR_MODE_P (mode1))
reduce_with_shift = false;
@@ -4918,11 +4943,13 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
/* First reduce the vector to the desired vector size we should
do shift reduction on by combining upper and lower halves. */
new_temp = new_phi_result;
- while (sz > sz1)
+ while (nunits > nunits1)
{
gcc_assert (!slp_reduc);
- sz /= 2;
- vectype1 = get_vectype_for_scalar_type_and_size (stype, sz);
+ nunits /= 2;
+ vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
+ stype, nunits);
+ unsigned int bitsize = tree_to_uhwi (TYPE_SIZE (vectype1));
/* The target has to make sure we support lowpart/highpart
extraction, either via direct vector extract or through
@@ -4947,15 +4974,14 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
= gimple_build_assign (dst2, BIT_FIELD_REF,
build3 (BIT_FIELD_REF, vectype1,
new_temp, TYPE_SIZE (vectype1),
- bitsize_int (sz * BITS_PER_UNIT)));
+ bitsize_int (bitsize)));
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
}
else
{
/* Extract via punning to appropriately sized integer mode
vector. */
- tree eltype = build_nonstandard_integer_type (sz * BITS_PER_UNIT,
- 1);
+ tree eltype = build_nonstandard_integer_type (bitsize, 1);
tree etype = build_vector_type (eltype, 2);
gcc_assert (convert_optab_handler (vec_extract_optab,
TYPE_MODE (etype),
@@ -4984,7 +5010,7 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
= gimple_build_assign (tem, BIT_FIELD_REF,
build3 (BIT_FIELD_REF, eltype,
new_temp, TYPE_SIZE (eltype),
- bitsize_int (sz * BITS_PER_UNIT)));
+ bitsize_int (bitsize)));
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
dst2 = make_ssa_name (vectype1);
epilog_stmt = gimple_build_assign (dst2, VIEW_CONVERT_EXPR,
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 3885d9cbe4a..1e00db5a326 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -3203,7 +3203,12 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin,
&& dbg_cnt (vect_slp))
{
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "***** Analysis succeeded with vector mode"
+ " %s\n", GET_MODE_NAME (bb_vinfo->vector_mode));
+ dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
+ }
bb_vinfo->shared->check_datarefs ();
vect_schedule_slp (bb_vinfo);
@@ -3223,6 +3228,13 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin,
vectorized = true;
}
+ else
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "***** Analysis failed with vector mode %s\n",
+ GET_MODE_NAME (bb_vinfo->vector_mode));
+ }
if (mode_i == 0)
autodetected_vector_mode = bb_vinfo->vector_mode;
@@ -3230,9 +3242,22 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin,
delete bb_vinfo;
if (mode_i < vector_modes.length ()
- && known_eq (GET_MODE_SIZE (vector_modes[mode_i]),
- GET_MODE_SIZE (autodetected_vector_mode)))
- mode_i += 1;
+ && VECTOR_MODE_P (autodetected_vector_mode)
+ && (related_vector_mode (vector_modes[mode_i],
+ GET_MODE_INNER (autodetected_vector_mode))
+ == autodetected_vector_mode)
+ && (related_vector_mode (autodetected_vector_mode,
+ GET_MODE_INNER (vector_modes[mode_i]))
+ == vector_modes[mode_i]))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "***** Skipping vector mode %s, which would"
+ " repeat the analysis for %s\n",
+ GET_MODE_NAME (vector_modes[mode_i]),
+ GET_MODE_NAME (autodetected_vector_mode));
+ mode_i += 1;
+ }
if (vectorized
|| mode_i == vector_modes.length ()
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 80f59accad7..36f832bb522 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -11138,18 +11138,28 @@ vect_remove_stores (stmt_vec_info first_stmt_info)
}
}
-/* Function get_vectype_for_scalar_type_and_size.
+/* If NUNITS is nonzero, return a vector type that contains NUNITS
+ elements of type SCALAR_TYPE, or null if the target doesn't support
+ such a type.
- Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
- by the target. */
+ If NUNITS is zero, return a vector type that contains elements of
+ type SCALAR_TYPE, choosing whichever vector size the target prefers.
+
+ If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
+ for this vectorization region and want to "autodetect" the best choice.
+ Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
+ and we want the new type to be interoperable with it. PREVAILING_MODE
+ in this case can be a scalar integer mode or a vector mode; when it
+ is a vector mode, the function acts like a tree-level version of
+ related_vector_mode. */
tree
-get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
+get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
+ tree scalar_type, poly_uint64 nunits)
{
tree orig_scalar_type = scalar_type;
scalar_mode inner_mode;
machine_mode simd_mode;
- poly_uint64 nunits;
tree vectype;
if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
@@ -11189,10 +11199,11 @@ get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
if (scalar_type == NULL_TREE)
return NULL_TREE;
- /* If no size was supplied use the mode the target prefers. Otherwise
- lookup a vector mode of the specified size. */
- if (known_eq (size, 0U))
+ /* If no prevailing mode was supplied, use the mode the target prefers.
+ Otherwise lookup a vector mode based on the prevailing mode. */
+ if (prevailing_mode == VOIDmode)
{
+ gcc_assert (known_eq (nunits, 0U));
simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
if (SCALAR_INT_MODE_P (simd_mode))
{
@@ -11208,9 +11219,19 @@ get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
return NULL_TREE;
}
}
- else if (!multiple_p (size, nbytes, &nunits)
- || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
- return NULL_TREE;
+ else if (SCALAR_INT_MODE_P (prevailing_mode)
+ || !related_vector_mode (prevailing_mode,
+ inner_mode, nunits).exists (&simd_mode))
+ {
+ /* Fall back to using mode_for_vector, mostly in the hope of being
+ able to use an integer mode. */
+ if (known_eq (nunits, 0U)
+ && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
+ return NULL_TREE;
+
+ if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
+ return NULL_TREE;
+ }
vectype = build_vector_type_for_mode (scalar_type, simd_mode);
@@ -11238,9 +11259,8 @@ get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
tree
get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type)
{
- tree vectype;
- poly_uint64 vector_size = GET_MODE_SIZE (vinfo->vector_mode);
- vectype = get_vectype_for_scalar_type_and_size (scalar_type, vector_size);
+ tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
+ scalar_type);
if (vectype && vinfo->vector_mode == VOIDmode)
vinfo->vector_mode = TYPE_MODE (vectype);
return vectype;
@@ -11273,8 +11293,13 @@ get_same_sized_vectype (tree scalar_type, tree vector_type)
if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
return truth_type_for (vector_type);
- return get_vectype_for_scalar_type_and_size
- (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
+ poly_uint64 nunits;
+ if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
+ GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
+ return NULL_TREE;
+
+ return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
+ scalar_type, nunits);
}
/* Function vect_is_simple_use.
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index d6de78350e6..7be81a0b27f 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -1359,7 +1359,7 @@ get_vec_alignment_for_array_type (tree type)
poly_uint64 array_size, vector_size;
tree scalar_type = strip_array_types (type);
- tree vectype = get_vectype_for_scalar_type_and_size (scalar_type, 0);
+ tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type);
if (!vectype
|| !poly_int_tree_p (TYPE_SIZE (type), &array_size)
|| !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index f6efed1f863..fadc4d89d16 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -335,8 +335,9 @@ public:
/* Cost data used by the target cost model. */
void *target_cost_data;
- /* If we've chosen a vector size for this vectorization region,
- this is one mode that has such a size, otherwise it is VOIDmode. */
+ /* The argument we should pass to related_vector_mode when looking up
+ the vector mode for a scalar mode, or VOIDmode if we haven't yet
+ made any decisions about which vector modes to use. */
machine_mode vector_mode;
private:
@@ -1624,8 +1625,9 @@ extern bool vect_can_advance_ivs_p (loop_vec_info);
extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code);
/* In tree-vect-stmts.c. */
+extern tree get_related_vectype_for_scalar_type (machine_mode, tree,
+ poly_uint64 = 0);
extern tree get_vectype_for_scalar_type (vec_info *, tree);
-extern tree get_vectype_for_scalar_type_and_size (tree, poly_uint64);
extern tree get_mask_type_for_scalar_type (vec_info *, tree);
extern tree get_same_sized_vectype (tree, tree);
extern bool vect_get_loop_mask_type (loop_vec_info);
</cut>
VirtIO Initiative ([STR-9])
===========================
- posted Enabling hypervisor agnosticism for VirtIO backends
Message-Id: <87v94ldrqq.fsf(a)linaro.org>
VirtIO RPMB ([STR-5])
- made more progress and now have PROGRAM_KEY/WRITE_COUNTER done -
feels like it's getting faster
[hacking branch] <https://github.com/stsquad/virtio-rpmb/tree/hacking>
Fix VirtIO spec as per Rucha's email
------------------------------------
QEMU Upstream Work ([UM-2])
===========================
- posted [PATCH for 6.1-rc3 v1 0/4] gitlab and plugins pre-PR
Message-Id: <20210806141015.2487502-1-alex.bennee(a)linaro.org>
- prepared a potential [pull request for testing issues] but looks
like it will wait for 6.2
[pull request for testing issues]
<https://github.com/stsquad/qemu/tree/pr/120821-for-6.1-rc4-1>
Write a generic overview of vhost user usage for the manual
Enable plugins by default on TCG builds
- [X] clean-up testing matrix
Completed Reviews [10/10]
=========================
[PATCH 00/13] new plugin argument passing scheme
Message-Id: <20210717100920.240793-1-ma.mandourr(a)gmail.com>
[PATCH 0/9] new plugin argument passing scheme
Message-Id: <20210716080345.136784-1-ma.mandourr(a)gmail.com>
[RFC PATCH] Subject: [RFC PATCH] plugins: Passed the parsed arguments directly to plugins
Message-Id: <20210623155553.481099-1-ma.mandourr(a)gmail.com>
[PATCH 3/6] plugins/cache: Fixed a use-after-free bug with multithreaded usermode
Message-Id: <20210714172151.8494-4-ma.mandourr(a)gmail.com>
[PATCH v8] tests/tcg/s390x: Test SIGILL and SIGSEGV handling
Message-Id: <20210804225146.154513-1-iii(a)linux.ibm.com>
[RFC PATCH v2] Add a post for the new TCG cache modelling plugin
Message-Id: <20210617121707.764126-1-ma.mandourr(a)gmail.com>
[PATCH for 6.1] plugins: do not limit exported symbols if modules are active
Message-Id: <20210811100550.54714-1-pbonzini(a)redhat.com>
[PATCH v4 00/13] new plugin argument passing scheme
Message-Id: <20210730135817.17816-1-ma.mandourr(a)gmail.com>
[PATCH 0/6] docs/devel: Organize devel manual into further subsections
Message-Id: <20210804005621.1577302-1-jsnow(a)redhat.com>
[PATCH] Makefile: Fix cscope issues on MacOS and soft links
Message-Id: <20210801171144.60412-1-peterx(a)redhat.com>
Absences
========
- Another partial week
- On holiday for rest of August
Current Review Queue
====================
TODO [PATCH v3] accel/tcg: Clear PAGE_WRITE before translation
Message-Id: <20210805204835.158918-1-iii(a)linux.ibm.com>
=====================================================================================================================
TODO [PATCH 0/7] tcg: some small towards more modular tcg
Message-Id: <20210804143826.3402872-1-kraxel(a)redhat.com>
=================================================================================================================
TODO [PATCH 0/2] Acceptance Tests: clean up of temporary dirs and MAINTAINERS entry
Message-Id: <20210803193447.3946219-1-crosa(a)redhat.com>
==========================================================================================================================================
TODO [PATCH v2 00/11] Atomic cleanup + clang-12 build fix
Message-Id: <20210717014121.1784956-1-richard.henderson(a)linaro.org>
============================================================================================================================
--
Alex Bennée
Progress:
* UM-2 [QEMU upstream maintainership]
+ Getting rc3 out of the door
+ Finished the systick timer refactoring series and sent it out for
review (it ended up weighing in at 25 patches...)
+ Worked through some Coverity issue reports to analyze them and
either close as false-positive or send out patches fixing them
-- PMM
I'm compiling and running a bare metal AArch64 bootloader using 3
different compilers: the Linaro / ARM GCC 10.3.1 compiler, the Linaro /
ARM GCC 10.2.1 compiler, and an in-house built GCC 10.2.0 compiler.
GDB will single step using the either of the GCC 10.2 compilers; but
runs without halting when step is requested - or perhaps steps multiple
instructions - when built using the Linaro / ARM-supplied GCC 10.3.1.
Eclipse CDT (v4.20 aka 2021-06) is able to correlate debugging
information from binaries built with either of the gcc 10.2 toolchains,
and to single step correctly through the program. Breakpoints work as
expected. Registers display fine.
Eclipse CDT is not able to correlate current PC location to source code
using the binary built with Linaro / ARM 10.3, instead bringing up a
disassembly window. Breakpoints placed at assembly instructions in the
editor do not work.
I've tried three different GDB versions - ARM's supplied 10.2 and 10.3
GDB, and the in-house built GDB. Results are the same.
The same makefile is used to create the binaries, with just a few macro
definitions to switch. The only compiler flag of interest is
-march=armv8.2-a (and of course -g -O0). -mtune=cortex-a53 doesn't help.
The board is connected via JTAG using OpenOCD 0.11.0+ and an Olimex
ARM-USB-OCD-H adapter.
I'm building in a cygwin shell on Windows 10 version 21H1 using the
compilers:
gcc-arm-10.3-2021.07-mingw-w64-i686-aarch64-none-elf.tar.xz
gcc-arm-10.2-2020.11-mingw-w64-i686-aarch64-none-elf.tar.xz
downloaded from:
https://developer.arm.com/tools-and-software/open-source-software/developer…
Differences in compiler configuration (gcc -v) are:
Failing - Linaro / ARM GCC 10.3(.1):
--enable-checking=release
--target=aarch64-none-elf
--with-libiconv-prefix=/data/jenkins/workspace/GNU-toolchain/arm-10-4/build-mingw-aarch64-none-elf/host-tools
Working - in house GCC 10.2.1:
--build=x86_64-w64-mingw32
--disable-libffi
--disable-libgomp
--disable-libmudflap
--disable-libssp
--disable-libstdcxx-pch
--disable-lto
--disable-win32-registry
--enable-multilib
--target=aarch64-elf
--with-gcc
--with-gnu-as
--with-gnu-ld
--with-host-libstdcxx='-static-libgcc -Wl,-Bstatic,-lstdc++,-Bdynamic -lm'
--with-multilib-list=lp64,ilp32
--with-stabs
--with-sysroot=/build/aarch64-elf_10.2.0/cross-gcc/aarch64-elf
--with-zstd=/build/aarch64-elf_10.2.0/host
Has anyone been able to perform hardware debugging of binaries built
with the latest 10.3 builds using GDB (and maybe even Eclipse CDT)?
Any suggestions as to other steps to try?
Thanks.
== This Week ==
* GNU-708 (Attribute to mark param as const)
- Created prototype patch
- Discussions on gcc mailing list
* PR66791 (replace builtins in intrinsics with vector extensions)
- Fixed issue with PR98435 test-case as suggested by Christophe
- Pinged patches for review.
== Next Week ==
- GNU-708, PR66791
Successfully identified regression in *gcc* in CI configuration tcwg_bmk_gnu_tk1/gnu-master-arm-spec2k6-O3_LTO. So far, this commit has regressed CI configurations:
- tcwg_bmk_gnu_tk1/gnu-master-arm-spec2k6-O3_LTO
Culprit:
<cut>
commit f31da42e047e8018ca6ad9809273bc7efb6ffcaf
Author: Richard Biener <rguenther(a)suse.de>
Date: Fri Aug 6 14:39:05 2021 +0200
tree-optimization/101801 - remove vect_worthwhile_without_simd_p
This removes the cost part of vect_worthwhile_without_simd_p, retaining
only the correctness bits. The reason is that the cost heuristic
do not properly account for SLP plus the check whether "without simd"
applies misfires for AVX512 mask vectors at the moment, leading to
missed vectorizations there.
Any costing decision should take place in the cost modeling, no
single stmt is to disable all vectorization on its own.
2021-08-06 Richard Biener <rguenther(a)suse.de>
PR tree-optimization/101801
* tree-vectorizer.h (vect_worthwhile_without_simd_p): Rename...
(vect_can_vectorize_without_simd_p): ... to this.
* tree-vect-loop.c (vect_worthwhile_without_simd_p): Rename...
(vect_can_vectorize_without_simd_p): ... to this and fold
in vect_min_worthwhile_factor.
(vect_min_worthwhile_factor): Remove.
(vectorizable_reduction): Adjust and remove the cost part.
* tree-vect-stmts.c (vectorizable_shift): Likewise.
(vectorizable_operation): Likewise.
</cut>
Results regressed to (for first_bad == f31da42e047e8018ca6ad9809273bc7efb6ffcaf)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer:
-5
# true:
0
# benchmark -- -O3_LTO_marm artifacts/build-f31da42e047e8018ca6ad9809273bc7efb6ffcaf/results_id:
1
# 482.sphinx3,sphinx_livepretend_base.default regressed by 105
from (for last_good == c2a984a3570b908a44a35e43bb48f0a05196156a)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer:
-5
# true:
0
# benchmark -- -O3_LTO_marm artifacts/build-c2a984a3570b908a44a35e43bb48f0a05196156a/results_id:
1
Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tk1-gnu-master-ar…
Results ID of last_good: tk1_32/tcwg_bmk_gnu_tk1/bisect-gnu-master-arm-spec2k6-O3_LTO/3203
Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tk1-gnu-master-ar…
Results ID of first_bad: tk1_32/tcwg_bmk_gnu_tk1/bisect-gnu-master-arm-spec2k6-O3_LTO/3211
Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tk1-gnu-master-ar…
Configuration details:
Reproduce builds:
<cut>
mkdir investigate-gcc-f31da42e047e8018ca6ad9809273bc7efb6ffcaf
cd investigate-gcc-f31da42e047e8018ca6ad9809273bc7efb6ffcaf
git clone https://git.linaro.org/toolchain/jenkins-scripts
mkdir -p artifacts/manifests
curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tk1-gnu-master-ar… --fail
curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tk1-gnu-master-ar… --fail
curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tk1-gnu-master-ar… --fail
chmod +x artifacts/test.sh
# Reproduce the baseline build (build all pre-requisites)
./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh
# Save baseline build state (which is then restored in artifacts/test.sh)
mkdir -p ./bisect
rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /gcc/ ./ ./bisect/baseline/
cd gcc
# Reproduce first_bad build
git checkout --detach f31da42e047e8018ca6ad9809273bc7efb6ffcaf
../artifacts/test.sh
# Reproduce last_good build
git checkout --detach c2a984a3570b908a44a35e43bb48f0a05196156a
../artifacts/test.sh
cd ..
</cut>
History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/…
Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tk1-gnu-master-ar…
Build log: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tk1-gnu-master-ar…
Full commit (up to 1000 lines):
<cut>
commit f31da42e047e8018ca6ad9809273bc7efb6ffcaf
Author: Richard Biener <rguenther(a)suse.de>
Date: Fri Aug 6 14:39:05 2021 +0200
tree-optimization/101801 - remove vect_worthwhile_without_simd_p
This removes the cost part of vect_worthwhile_without_simd_p, retaining
only the correctness bits. The reason is that the cost heuristic
do not properly account for SLP plus the check whether "without simd"
applies misfires for AVX512 mask vectors at the moment, leading to
missed vectorizations there.
Any costing decision should take place in the cost modeling, no
single stmt is to disable all vectorization on its own.
2021-08-06 Richard Biener <rguenther(a)suse.de>
PR tree-optimization/101801
* tree-vectorizer.h (vect_worthwhile_without_simd_p): Rename...
(vect_can_vectorize_without_simd_p): ... to this.
* tree-vect-loop.c (vect_worthwhile_without_simd_p): Rename...
(vect_can_vectorize_without_simd_p): ... to this and fold
in vect_min_worthwhile_factor.
(vect_min_worthwhile_factor): Remove.
(vectorizable_reduction): Adjust and remove the cost part.
* tree-vect-stmts.c (vectorizable_shift): Likewise.
(vectorizable_operation): Likewise.
---
gcc/tree-vect-loop.c | 43 +++++++------------------------------------
gcc/tree-vect-stmts.c | 26 ++------------------------
gcc/tree-vectorizer.h | 2 +-
3 files changed, 10 insertions(+), 61 deletions(-)
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 1e21fe6b13d..37c7daa7f9e 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -7227,24 +7227,13 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
if (dump_enabled_p ())
dump_printf (MSG_NOTE, "op not supported by target.\n");
if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
- || !vect_worthwhile_without_simd_p (loop_vinfo, code))
+ || !vect_can_vectorize_without_simd_p (code))
ok = false;
else
if (dump_enabled_p ())
dump_printf (MSG_NOTE, "proceeding using word mode.\n");
}
- /* Worthwhile without SIMD support? */
- if (ok
- && !VECTOR_MODE_P (TYPE_MODE (vectype_in))
- && !vect_worthwhile_without_simd_p (loop_vinfo, code))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not worthwhile without SIMD support.\n");
- ok = false;
- }
-
/* lane-reducing operations have to go through vect_transform_reduction.
For the other cases try without the single cycle optimization. */
if (!ok)
@@ -7948,46 +7937,28 @@ vectorizable_phi (vec_info *,
}
-/* Function vect_min_worthwhile_factor.
+/* Return true if we can emulate CODE on an integer mode representation
+ of a vector. */
- For a loop where we could vectorize the operation indicated by CODE,
- return the minimum vectorization factor that makes it worthwhile
- to use generic vectors. */
-static unsigned int
-vect_min_worthwhile_factor (enum tree_code code)
+bool
+vect_can_vectorize_without_simd_p (tree_code code)
{
switch (code)
{
case PLUS_EXPR:
case MINUS_EXPR:
case NEGATE_EXPR:
- return 4;
-
case BIT_AND_EXPR:
case BIT_IOR_EXPR:
case BIT_XOR_EXPR:
case BIT_NOT_EXPR:
- return 2;
+ return true;
default:
- return INT_MAX;
+ return false;
}
}
-/* Return true if VINFO indicates we are doing loop vectorization and if
- it is worth decomposing CODE operations into scalar operations for
- that loop's vectorization factor. */
-
-bool
-vect_worthwhile_without_simd_p (vec_info *vinfo, tree_code code)
-{
- loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
- unsigned HOST_WIDE_INT value;
- return (loop_vinfo
- && LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&value)
- && value >= vect_min_worthwhile_factor (code));
-}
-
/* Function vectorizable_induction
Check if STMT_INFO performs an induction computation that can be vectorized.
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 94bdb74ea8d..5b94d41e292 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -5685,24 +5685,13 @@ vectorizable_shift (vec_info *vinfo,
/* Check only during analysis. */
if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
|| (!vec_stmt
- && !vect_worthwhile_without_simd_p (vinfo, code)))
+ && !vect_can_vectorize_without_simd_p (code)))
return false;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"proceeding using word mode.\n");
}
- /* Worthwhile without SIMD support? Check only during analysis. */
- if (!vec_stmt
- && !VECTOR_MODE_P (TYPE_MODE (vectype))
- && !vect_worthwhile_without_simd_p (vinfo, code))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not worthwhile without SIMD support.\n");
- return false;
- }
-
if (!vec_stmt) /* transformation not required. */
{
if (slp_node
@@ -6094,24 +6083,13 @@ vectorizable_operation (vec_info *vinfo,
"op not supported by target.\n");
/* Check only during analysis. */
if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
- || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
+ || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
return false;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"proceeding using word mode.\n");
}
- /* Worthwhile without SIMD support? Check only during analysis. */
- if (!VECTOR_MODE_P (vec_mode)
- && !vec_stmt
- && !vect_worthwhile_without_simd_p (vinfo, code))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not worthwhile without SIMD support.\n");
- return false;
- }
-
int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
internal_fn cond_fn = get_conditional_internal_fn (code);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 5571b3cce3b..de0ecf86478 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2061,7 +2061,7 @@ extern bool vectorizable_lc_phi (loop_vec_info, stmt_vec_info,
gimple **, slp_tree);
extern bool vectorizable_phi (vec_info *, stmt_vec_info, gimple **, slp_tree,
stmt_vector_for_cost *);
-extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code);
+extern bool vect_can_vectorize_without_simd_p (tree_code);
extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
stmt_vector_for_cost *,
stmt_vector_for_cost *,
</cut>
Successfully identified regression in *gcc* in CI configuration tcwg_bmk_llvm_apm/llvm-release-arm-spec2k6-Os. So far, this commit has regressed CI configurations:
- tcwg_bmk_llvm_apm/llvm-release-arm-spec2k6-Os
Culprit:
<cut>
commit b9bb6a5e12cae44a1cbf298b69f28fc6871f81c8
Author: Jakub Jelinek <jakub(a)redhat.com>
Date: Tue Aug 11 16:46:49 2020 +0200
c-family: Fix ICE in get_atomic_generic_size [PR96545]
As the testcase shows, we would ICE if the type of the first argument of
various atomic builtins was pointer to (non-void) incomplete type, we would
assume that TYPE_SIZE_UNIT must be non-NULL. This patch diagnoses it
instead. And also changes the TREE_CODE != INTEGER_CST check to
!tree_fits_uhwi_p, as we use tree_to_uhwi after this and at least in theory
the int could be too large and not fit.
2020-08-11 Jakub Jelinek <jakub(a)redhat.com>
PR c/96545
* c-common.c (get_atomic_generic_size): Require that first argument's
type points to a complete type and use tree_fits_uhwi_p instead of
just INTEGER_CST TREE_CODE check for the TYPE_SIZE_UNIT.
* c-c++-common/pr96545.c: New test.
(cherry picked from commit 7840b4dc05539cf5575b3e9ff57ff5f6c3da2cae)
</cut>
Results regressed to (for first_bad == b9bb6a5e12cae44a1cbf298b69f28fc6871f81c8)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--with-mode=thumb --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--with-mode=thumb --set gcc_override_configure=--disable-libsanitizer:
-5
# build_llvm true:
-3
# true:
0
# benchmark -- -Os_mthumb artifacts/build-b9bb6a5e12cae44a1cbf298b69f28fc6871f81c8/results_id:
1
# 429.mcf,mcf_base.default regressed by 104
# 470.lbm,lbm_base.default regressed by 103
from (for last_good == db00336a49707327552e678b59da8e85384bdae6)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--with-mode=thumb --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--with-mode=thumb --set gcc_override_configure=--disable-libsanitizer:
-5
# build_llvm true:
-3
# true:
0
# benchmark -- -Os_mthumb artifacts/build-db00336a49707327552e678b59da8e85384bdae6/results_id:
1
Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-release…
Results ID of last_good: apm_32/tcwg_bmk_llvm_apm/bisect-llvm-release-arm-spec2k6-Os/3201
Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-release…
Results ID of first_bad: apm_32/tcwg_bmk_llvm_apm/bisect-llvm-release-arm-spec2k6-Os/3141
Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-release…
Configuration details:
Reproduce builds:
<cut>
mkdir investigate-gcc-b9bb6a5e12cae44a1cbf298b69f28fc6871f81c8
cd investigate-gcc-b9bb6a5e12cae44a1cbf298b69f28fc6871f81c8
git clone https://git.linaro.org/toolchain/jenkins-scripts
mkdir -p artifacts/manifests
curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-release… --fail
curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-release… --fail
curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-release… --fail
chmod +x artifacts/test.sh
# Reproduce the baseline build (build all pre-requisites)
./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh
# Save baseline build state (which is then restored in artifacts/test.sh)
mkdir -p ./bisect
rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /gcc/ ./ ./bisect/baseline/
cd gcc
# Reproduce first_bad build
git checkout --detach b9bb6a5e12cae44a1cbf298b69f28fc6871f81c8
../artifacts/test.sh
# Reproduce last_good build
git checkout --detach db00336a49707327552e678b59da8e85384bdae6
../artifacts/test.sh
cd ..
</cut>
History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/…
Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-release…
Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-release…
Full commit (up to 1000 lines):
<cut>
commit b9bb6a5e12cae44a1cbf298b69f28fc6871f81c8
Author: Jakub Jelinek <jakub(a)redhat.com>
Date: Tue Aug 11 16:46:49 2020 +0200
c-family: Fix ICE in get_atomic_generic_size [PR96545]
As the testcase shows, we would ICE if the type of the first argument of
various atomic builtins was pointer to (non-void) incomplete type, we would
assume that TYPE_SIZE_UNIT must be non-NULL. This patch diagnoses it
instead. And also changes the TREE_CODE != INTEGER_CST check to
!tree_fits_uhwi_p, as we use tree_to_uhwi after this and at least in theory
the int could be too large and not fit.
2020-08-11 Jakub Jelinek <jakub(a)redhat.com>
PR c/96545
* c-common.c (get_atomic_generic_size): Require that first argument's
type points to a complete type and use tree_fits_uhwi_p instead of
just INTEGER_CST TREE_CODE check for the TYPE_SIZE_UNIT.
* c-c++-common/pr96545.c: New test.
(cherry picked from commit 7840b4dc05539cf5575b3e9ff57ff5f6c3da2cae)
---
gcc/c-family/c-common.c | 9 ++++++++-
gcc/testsuite/c-c++-common/pr96545.c | 31 +++++++++++++++++++++++++++++++
2 files changed, 39 insertions(+), 1 deletion(-)
diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 20258c331af..b6eb40c8122 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -6948,8 +6948,15 @@ get_atomic_generic_size (location_t loc, tree function,
return 0;
}
+ if (!COMPLETE_TYPE_P (TREE_TYPE (type_0)))
+ {
+ error_at (loc, "argument 1 of %qE must be a pointer to a complete type",
+ function);
+ return 0;
+ }
+
/* Types must be compile time constant sizes. */
- if (TREE_CODE ((TYPE_SIZE_UNIT (TREE_TYPE (type_0)))) != INTEGER_CST)
+ if (!tree_fits_uhwi_p ((TYPE_SIZE_UNIT (TREE_TYPE (type_0)))))
{
error_at (loc,
"argument 1 of %qE must be a pointer to a constant size type",
diff --git a/gcc/testsuite/c-c++-common/pr96545.c b/gcc/testsuite/c-c++-common/pr96545.c
new file mode 100644
index 00000000000..bc6b0cf345c
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/pr96545.c
@@ -0,0 +1,31 @@
+/* PR c/96545 */
+/* { dg-do compile } */
+
+extern char x[], y[], z[];
+struct S;
+extern struct S s, t, u;
+int v, w;
+
+void
+foo (void)
+{
+ __atomic_exchange (&x, &y, &z, 0); /* { dg-error "must be a pointer to a complete type" } */
+}
+
+void
+bar (void)
+{
+ __atomic_exchange (&s, &t, &u, 0); /* { dg-error "must be a pointer to a complete type" } */
+}
+
+void
+baz (void)
+{
+ __atomic_exchange (&v, &t, &w, 0); /* { dg-error "size mismatch in argument 2 of" } */
+}
+
+void
+qux (void)
+{
+ __atomic_exchange (&v, &w, &t, 0); /* { dg-error "size mismatch in argument 3 of" } */
+}
</cut>
Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O3. So far, this commit has regressed CI configurations:
- tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O3
Culprit:
<cut>
commit f1ab60e40d16970381a003e145be6d5932823597
Author: Tomasz Kamiński <tomasz.kaminski(a)sonarsource.com>
Date: Thu Jul 29 10:55:24 2021 +0200
Fix FindZ3.cmake to support static libraries and Windows
Use absolute path to link z3 to allow builds both on windows and linux
since the library name is platform dependent for Z3 (libz3 on Windows
and z3 on Linux) and MSVC does not recognized -L and -l options.
Fix CMAKE_CROSSCOMPILING that does not work correctly since it uses
Z3_BUILD_VERSION instead of Z3_BUILD_NUMBER
Fix building with the static version of z3 library (supersedes D80227).
- Build the Z3 version detection code as C++, since the static
library brings in libstdc++ symbols
- Detect threading support and link against threading, in the
(likely) case Z3 was built with threads
Exposed compilation error from building a program that is used to detect
z3 version in the warning message, to simplify troubleshooting.
Reviewed By: JDevlieghere
Differential Revision: https://reviews.llvm.org/D106131
</cut>
Results regressed to (for first_bad == f1ab60e40d16970381a003e145be6d5932823597)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# build_llvm true:
-3
# true:
0
# benchmark -- -O3 artifacts/build-f1ab60e40d16970381a003e145be6d5932823597/results_id:
1
# 464.h264ref,h264ref_base.default regressed by 106
# 464.h264ref,[.] FastFullPelBlockMotionSearch regressed by 131
from (for last_good == 2df8bf9339e43de63d8d28e07182e1d6d7ffb843)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# build_llvm true:
-3
# true:
0
# benchmark -- -O3 artifacts/build-2df8bf9339e43de63d8d28e07182e1d6d7ffb843/results_id:
1
Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-…
Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O3/3087
Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-…
Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O3/3058
Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-…
Configuration details:
Reproduce builds:
<cut>
mkdir investigate-llvm-f1ab60e40d16970381a003e145be6d5932823597
cd investigate-llvm-f1ab60e40d16970381a003e145be6d5932823597
git clone https://git.linaro.org/toolchain/jenkins-scripts
mkdir -p artifacts/manifests
curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail
curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail
curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail
chmod +x artifacts/test.sh
# Reproduce the baseline build (build all pre-requisites)
./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh
# Save baseline build state (which is then restored in artifacts/test.sh)
mkdir -p ./bisect
rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/
cd llvm
# Reproduce first_bad build
git checkout --detach f1ab60e40d16970381a003e145be6d5932823597
../artifacts/test.sh
# Reproduce last_good build
git checkout --detach 2df8bf9339e43de63d8d28e07182e1d6d7ffb843
../artifacts/test.sh
cd ..
</cut>
History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/…
Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-…
Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-…
Full commit (up to 1000 lines):
<cut>
commit f1ab60e40d16970381a003e145be6d5932823597
Author: Tomasz Kamiński <tomasz.kaminski(a)sonarsource.com>
Date: Thu Jul 29 10:55:24 2021 +0200
Fix FindZ3.cmake to support static libraries and Windows
Use absolute path to link z3 to allow builds both on windows and linux
since the library name is platform dependent for Z3 (libz3 on Windows
and z3 on Linux) and MSVC does not recognized -L and -l options.
Fix CMAKE_CROSSCOMPILING that does not work correctly since it uses
Z3_BUILD_VERSION instead of Z3_BUILD_NUMBER
Fix building with the static version of z3 library (supersedes D80227).
- Build the Z3 version detection code as C++, since the static
library brings in libstdc++ symbols
- Detect threading support and link against threading, in the
(likely) case Z3 was built with threads
Exposed compilation error from building a program that is used to detect
z3 version in the warning message, to simplify troubleshooting.
Reviewed By: JDevlieghere
Differential Revision: https://reviews.llvm.org/D106131
---
llvm/cmake/modules/FindZ3.cmake | 29 ++++++++++++++++++++++-------
1 file changed, 22 insertions(+), 7 deletions(-)
diff --git a/llvm/cmake/modules/FindZ3.cmake b/llvm/cmake/modules/FindZ3.cmake
index 95dd37789a87..118b1eac3b32 100644
--- a/llvm/cmake/modules/FindZ3.cmake
+++ b/llvm/cmake/modules/FindZ3.cmake
@@ -2,8 +2,21 @@ INCLUDE(CheckCXXSourceRuns)
# Function to check Z3's version
function(check_z3_version z3_include z3_lib)
+ # Get lib path
+ set(z3_link_libs "${z3_lib}")
+
+ # Try to find a threading module in case Z3 was built with threading support.
+ # Threads are required elsewhere in LLVM, but not marked as required here because
+ # Z3 could have been compiled without threading support.
+ find_package(Threads)
+ # CMAKE_THREAD_LIBS_INIT may be empty if the thread functions are provided by the
+ # system libraries and no special flags are needed.
+ if(CMAKE_THREAD_LIBS_INIT)
+ list(APPEND z3_link_libs "${CMAKE_THREAD_LIBS_INIT}")
+ endif()
+
# The program that will be executed to print Z3's version.
- file(WRITE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/testz3.c
+ file(WRITE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/testz3.cpp
"#include <assert.h>
#include <z3.h>
int main() {
@@ -13,16 +26,14 @@ function(check_z3_version z3_include z3_lib)
return 0;
}")
- # Get lib path
- get_filename_component(z3_lib_path ${z3_lib} PATH)
-
try_run(
Z3_RETURNCODE
Z3_COMPILED
${CMAKE_BINARY_DIR}
- ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/testz3.c
+ ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/testz3.cpp
COMPILE_DEFINITIONS -I"${z3_include}"
- LINK_LIBRARIES -L${z3_lib_path} -lz3
+ LINK_LIBRARIES ${z3_link_libs}
+ COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT
RUN_OUTPUT_VARIABLE SRC_OUTPUT
)
@@ -30,6 +41,9 @@ function(check_z3_version z3_include z3_lib)
string(REGEX REPLACE "([0-9]*\\.[0-9]*\\.[0-9]*)" "\\1"
z3_version "${SRC_OUTPUT}")
set(Z3_VERSION_STRING ${z3_version} PARENT_SCOPE)
+ else()
+ message(NOTICE "${COMPILE_OUTPUT}")
+ message(WARNING "Failed to compile Z3 program that is used to determine library version.")
endif()
endfunction(check_z3_version)
@@ -86,7 +100,7 @@ if(NOT Z3_VERSION_STRING AND (CMAKE_CROSSCOMPILING AND
file(STRINGS "${Z3_INCLUDE_DIR}/z3_version.h"
z3_version_str REGEX "^#define[\t ]+Z3_BUILD_NUMBER[\t ]+.*")
- string(REGEX REPLACE "^.*Z3_BUILD_VERSION[\t ]+([0-9]).*$" "\\1"
+ string(REGEX REPLACE "^.*Z3_BUILD_NUMBER[\t ]+([0-9]).*$" "\\1"
Z3_BUILD "${z3_version_str}")
set(Z3_VERSION_STRING ${Z3_MAJOR}.${Z3_MINOR}.${Z3_BUILD})
@@ -98,6 +112,7 @@ if(NOT Z3_VERSION_STRING)
# conservative and force the found version to 0.0.0 to make version
# checks always fail.
set(Z3_VERSION_STRING "0.0.0")
+ message(WARNING "Failed to determine Z3 library version, defaulting to 0.0.0.")
endif()
# handle the QUIETLY and REQUIRED arguments and set Z3_FOUND to TRUE if
</cut>
Progress:
* UM-2 [QEMU upstream maintainership]
+ Usual release work (rc2 now out) and code review
+ Continuing with systick timer refactoring. This has turned out a bit
more complicated than I expected: had to do a preliminary refactor
to move some stuff out of the NVIC device into the armv7m container;
also needed to add support in the Clock APIs for frequency multiply
and divide for the benefit of the stm32 SoCs which drive the systick
reference clock at 1/8 the speed of the main CPU clock
-- PMM
Successfully identified regression in *gcc* in CI configuration tcwg_bmk_gnu_tx1/gnu-release-aarch64-spec2k6-O2. So far, this commit has regressed CI configurations:
- tcwg_bmk_gnu_tx1/gnu-release-aarch64-spec2k6-O2
Culprit:
<cut>
commit 6ff0cdebb1bc281ba2374f3ecdbe358c4fa74093
Author: Richard Sandiford <richard.sandiford(a)arm.com>
Date: Thu Oct 31 17:16:31 2019 +0000
[AArch64] Fix build for non-default languages
The SVE PCS support broke go, D and Ada because those languages don't
call TARGET_INIT_BUILTINS. We therefore ended up trying to get the
TYPE_MAIN_VARIANT of a null __SVBool_t.
We shouldn't really need to apply TYPE_MAIN_VARIANT there anyway,
since the ABI-defined types are (and need to be) their own main
variants. This patch asserts for that instead.
2019-10-31 Richard Sandiford <richard.sandiford(a)arm.com>
gcc/
* config/aarch64/aarch64-sve-builtins.cc (register_builtin_types):
Assert that the type we store in abi_vector_types is its own
main variant.
(svbool_type_p): Don't apply TYPE_MAIN_VARIANT here.
From-SVN: r277680
</cut>
Results regressed to (for first_bad == 6ff0cdebb1bc281ba2374f3ecdbe358c4fa74093)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# true:
0
# benchmark -- -O2 artifacts/build-6ff0cdebb1bc281ba2374f3ecdbe358c4fa74093/results_id:
1
# 458.sjeng,[.] setup_attackers regressed by 111
# 458.sjeng,[.] search regressed by 215
from (for last_good == aaa80941e042d18dcd5add6e7bb28cb392767a39)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# true:
0
# benchmark -- -O2 artifacts/build-aaa80941e042d18dcd5add6e7bb28cb392767a39/results_id:
1
Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a…
Results ID of last_good: tx1_64/tcwg_bmk_gnu_tx1/bisect-gnu-release-aarch64-spec2k6-O2/2854
Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a…
Results ID of first_bad: tx1_64/tcwg_bmk_gnu_tx1/bisect-gnu-release-aarch64-spec2k6-O2/2851
Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a…
Configuration details:
Reproduce builds:
<cut>
mkdir investigate-gcc-6ff0cdebb1bc281ba2374f3ecdbe358c4fa74093
cd investigate-gcc-6ff0cdebb1bc281ba2374f3ecdbe358c4fa74093
git clone https://git.linaro.org/toolchain/jenkins-scripts
mkdir -p artifacts/manifests
curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… --fail
curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… --fail
curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… --fail
chmod +x artifacts/test.sh
# Reproduce the baseline build (build all pre-requisites)
./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh
# Save baseline build state (which is then restored in artifacts/test.sh)
mkdir -p ./bisect
rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /gcc/ ./ ./bisect/baseline/
cd gcc
# Reproduce first_bad build
git checkout --detach 6ff0cdebb1bc281ba2374f3ecdbe358c4fa74093
../artifacts/test.sh
# Reproduce last_good build
git checkout --detach aaa80941e042d18dcd5add6e7bb28cb392767a39
../artifacts/test.sh
cd ..
</cut>
History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/…
Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a…
Build log: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a…
Full commit (up to 1000 lines):
<cut>
commit 6ff0cdebb1bc281ba2374f3ecdbe358c4fa74093
Author: Richard Sandiford <richard.sandiford(a)arm.com>
Date: Thu Oct 31 17:16:31 2019 +0000
[AArch64] Fix build for non-default languages
The SVE PCS support broke go, D and Ada because those languages don't
call TARGET_INIT_BUILTINS. We therefore ended up trying to get the
TYPE_MAIN_VARIANT of a null __SVBool_t.
We shouldn't really need to apply TYPE_MAIN_VARIANT there anyway,
since the ABI-defined types are (and need to be) their own main
variants. This patch asserts for that instead.
2019-10-31 Richard Sandiford <richard.sandiford(a)arm.com>
gcc/
* config/aarch64/aarch64-sve-builtins.cc (register_builtin_types):
Assert that the type we store in abi_vector_types is its own
main variant.
(svbool_type_p): Don't apply TYPE_MAIN_VARIANT here.
From-SVN: r277680
---
gcc/ChangeLog | 7 +++++++
gcc/config/aarch64/aarch64-sve-builtins.cc | 4 ++--
2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 66b7a142251..affa74cdd25 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2019-10-31 Richard Sandiford <richard.sandiford(a)arm.com>
+
+ * config/aarch64/aarch64-sve-builtins.cc (register_builtin_types):
+ Assert that the type we store in abi_vector_types is its own
+ main variant.
+ (svbool_type_p): Don't apply TYPE_MAIN_VARIANT here.
+
2019-10-31 Richard Earnshaw <rearnsha(a)arm.com>
* config/arm/arm.c (arm_legitimize_address): Don't form negative offsets
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 70d7b1a165d..424f64adfef 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -2993,6 +2993,7 @@ register_builtin_types ()
BITS_PER_SVE_VECTOR));
}
vectype = build_distinct_type_copy (vectype);
+ gcc_assert (vectype == TYPE_MAIN_VARIANT (vectype));
SET_TYPE_STRUCTURAL_EQUALITY (vectype);
TYPE_ARTIFICIAL (vectype) = 1;
abi_vector_types[i] = vectype;
@@ -3235,8 +3236,7 @@ bool
svbool_type_p (const_tree type)
{
tree abi_type = abi_vector_types[VECTOR_TYPE_svbool_t];
- return (type != error_mark_node
- && TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (abi_type));
+ return type != error_mark_node && TYPE_MAIN_VARIANT (type) == abi_type;
}
/* If TYPE is a built-in type defined by the SVE ABI, return the mangled name,
</cut>
Successfully identified regression in *gcc* in CI configuration tcwg_gnu/gnu-master-arm-check_gcc. So far, this commit has regressed CI configurations:
- tcwg_gnu/gnu-master-arm-check_gcc
Culprit:
<cut>
commit 34dbb5f346459a1b36cd0cfbfe1cf18cd099fdf3
Author: Marek Polacek <polacek(a)redhat.com>
Date: Tue Jul 20 16:26:28 2021 -0400
include: Fix -Wundef warnings in ansidecl.h
This quashes -Wundef warnings in ansidecl.h when compiled in C or C++.
In C, __cpp_constexpr and __cplusplus aren't defined so we evaluate
them to 0; conversely, __STDC_VERSION__ is not defined in C++.
This has caused grief when -Wundef is used with -Werror.
I've also tested -traditional-cpp.
include/ChangeLog:
* ansidecl.h: Check if __cplusplus is defined before checking
the value of __cpp_constexpr and __cplusplus. Don't check
__STDC_VERSION__ in C++.
</cut>
Results regressed to (for first_bad == 34dbb5f346459a1b36cd0cfbfe1cf18cd099fdf3)
# reset_artifacts:
-10
# build_abe binutils:
-2
# build_abe gcc:
-1
# build_abe dejagnu:
0
# build_abe check_gcc -- --set runtestflags=gcc.c-torture/execute/execute.exp:
1
# Getting actual results from build directory /home/tcwg-buildslave/workspace/tcwg_gnu_0/artifacts/build-34dbb5f346459a1b36cd0cfbfe1cf18cd099fdf3/sumfiles
# /home/tcwg-buildslave/workspace/tcwg_gnu_0/artifacts/build-34dbb5f346459a1b36cd0cfbfe1cf18cd099fdf3/sumfiles/libstdc++.sum
# /home/tcwg-buildslave/workspace/tcwg_gnu_0/artifacts/build-34dbb5f346459a1b36cd0cfbfe1cf18cd099fdf3/sumfiles/gfortran.sum
# /home/tcwg-buildslave/workspace/tcwg_gnu_0/artifacts/build-34dbb5f346459a1b36cd0cfbfe1cf18cd099fdf3/sumfiles/libitm.sum
# /home/tcwg-buildslave/workspace/tcwg_gnu_0/artifacts/build-34dbb5f346459a1b36cd0cfbfe1cf18cd099fdf3/sumfiles/libgomp.sum
# /home/tcwg-buildslave/workspace/tcwg_gnu_0/artifacts/build-34dbb5f346459a1b36cd0cfbfe1cf18cd099fdf3/sumfiles/libatomic.sum
# /home/tcwg-buildslave/workspace/tcwg_gnu_0/artifacts/build-34dbb5f346459a1b36cd0cfbfe1cf18cd099fdf3/sumfiles/g++.sum
# /home/tcwg-buildslave/workspace/tcwg_gnu_0/artifacts/build-34dbb5f346459a1b36cd0cfbfe1cf18cd099fdf3/sumfiles/gcc.sum
# Manifest: gcc-compare-results/contrib/testsuite-management/flaky/gnu-master-arm-check_gcc.xfail
# Getting actual results from build directory base-artifacts/sumfiles
# base-artifacts/sumfiles/libstdc++.sum
# base-artifacts/sumfiles/gfortran.sum
# base-artifacts/sumfiles/libitm.sum
# base-artifacts/sumfiles/libgomp.sum
# base-artifacts/sumfiles/libatomic.sum
# base-artifacts/sumfiles/g++.sum
# base-artifacts/sumfiles/gcc.sum
#
#
# Unexpected results in this build (new failures)
# === gcc tests ===
#
# Running gcc.c-torture/execute/execute.exp ...
# FAIL: gcc.c-torture/execute/20030117-1.c -Os execution test
# FAIL: gcc.c-torture/execute/20031215-1.c -O1 execution test
#
# === Results Summary ===
from (for last_good == ead235f60139edc6eb408d8d083cbb15e417b447)
# reset_artifacts:
-10
# build_abe binutils:
-2
# build_abe gcc:
-1
# build_abe dejagnu:
0
# build_abe check_gcc -- --set runtestflags=gcc.c-torture/execute/execute.exp:
1
Artifacts of last_good build: https://ci.linaro.org/job/tcwg_gcc-bisect-gnu-master-arm-check_gcc/2/artifa…
Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_gcc-bisect-gnu-master-arm-check_gcc/2/artifa…
Build top page/logs: https://ci.linaro.org/job/tcwg_gcc-bisect-gnu-master-arm-check_gcc/2/
Configuration details:
Reproduce builds:
<cut>
mkdir investigate-gcc-34dbb5f346459a1b36cd0cfbfe1cf18cd099fdf3
cd investigate-gcc-34dbb5f346459a1b36cd0cfbfe1cf18cd099fdf3
git clone https://git.linaro.org/toolchain/jenkins-scripts
mkdir -p artifacts/manifests
curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_gcc-bisect-gnu-master-arm-check_gcc/2/artifa… --fail
curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_gcc-bisect-gnu-master-arm-check_gcc/2/artifa… --fail
curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_gcc-bisect-gnu-master-arm-check_gcc/2/artifa… --fail
chmod +x artifacts/test.sh
# Reproduce the baseline build (build all pre-requisites)
./jenkins-scripts/tcwg_gnu-build.sh @@ artifacts/manifests/build-baseline.sh
# Save baseline build state (which is then restored in artifacts/test.sh)
mkdir -p ./bisect
rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /gcc/ ./ ./bisect/baseline/
cd gcc
# Reproduce first_bad build
git checkout --detach 34dbb5f346459a1b36cd0cfbfe1cf18cd099fdf3
../artifacts/test.sh
# Reproduce last_good build
git checkout --detach ead235f60139edc6eb408d8d083cbb15e417b447
../artifacts/test.sh
cd ..
</cut>
History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/…
Artifacts: https://ci.linaro.org/job/tcwg_gcc-bisect-gnu-master-arm-check_gcc/2/artifa…
Build log: https://ci.linaro.org/job/tcwg_gcc-bisect-gnu-master-arm-check_gcc/2/consol…
Full commit (up to 1000 lines):
<cut>
commit 34dbb5f346459a1b36cd0cfbfe1cf18cd099fdf3
Author: Marek Polacek <polacek(a)redhat.com>
Date: Tue Jul 20 16:26:28 2021 -0400
include: Fix -Wundef warnings in ansidecl.h
This quashes -Wundef warnings in ansidecl.h when compiled in C or C++.
In C, __cpp_constexpr and __cplusplus aren't defined so we evaluate
them to 0; conversely, __STDC_VERSION__ is not defined in C++.
This has caused grief when -Wundef is used with -Werror.
I've also tested -traditional-cpp.
include/ChangeLog:
* ansidecl.h: Check if __cplusplus is defined before checking
the value of __cpp_constexpr and __cplusplus. Don't check
__STDC_VERSION__ in C++.
---
include/ansidecl.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/include/ansidecl.h b/include/ansidecl.h
index 0515228f325..2efe3e85e59 100644
--- a/include/ansidecl.h
+++ b/include/ansidecl.h
@@ -79,7 +79,7 @@ So instead we use the macro below and test it against specific values. */
/* inline requires special treatment; it's in C99, and GCC >=2.7 supports
it too, but it's not in C89. */
#undef inline
-#if __STDC_VERSION__ >= 199901L || defined(__cplusplus) || (defined(__SUNPRO_C) && defined(__C99FEATURES__))
+#if (!defined(__cplusplus) && __STDC_VERSION__ >= 199901L) || defined(__cplusplus) || (defined(__SUNPRO_C) && defined(__C99FEATURES__))
/* it's a keyword */
#else
# if GCC_VERSION >= 2007
@@ -356,7 +356,7 @@ So instead we use the macro below and test it against specific values. */
#define ENUM_BITFIELD(TYPE) unsigned int
#endif
-#if __cpp_constexpr >= 200704
+#if defined(__cplusplus) && __cpp_constexpr >= 200704
#define CONSTEXPR constexpr
#else
#define CONSTEXPR
@@ -419,7 +419,7 @@ So instead we use the macro below and test it against specific values. */
so that most attempts at copy are caught at compile-time. */
-#if __cplusplus >= 201103
+#if defined(__cplusplus) && __cplusplus >= 201103
#define DISABLE_COPY_AND_ASSIGN(TYPE) \
TYPE (const TYPE&) = delete; \
void operator= (const TYPE &) = delete
</cut>
Progress:
* UM-2 [QEMU upstream maintainership]
+ collected up/reviewed arm patches ready for rc1
+ sent patch fixing a silly crash if the user tried to pass both a
guest kernel and a firmware blob to the raspi3 machine
+ patchset fixing a lot of places in our docs where we used `foo`
in rST markup but we meant ``foo``
+ some docs patches converting old .txt files to .rst so we
actually ship the docs to users
+ patchset doing a bit of cleanup on arch-init.c
+ patches implementing M-profile trap-on-division-by-zero
+ started to look at refactoring the M-profile systick timer to
no longer rely on an ugly global variable to specify the frequency
* QEMU-406 [QEMU support for MVE (M-profile Vector Extension; Helium)]
+ sent out MVE patchset for review which has full coverage of the
instruction set and includes "enable MVE on Cortex-M55"
+ remaining TODO items: fault on unaligned accesses; report MVE
registers via gdbstub; optimize codegen for the no-predication case
-- PMM
Successfully identified regression in *gcc* in CI configuration tcwg_bmk_gnu_tx1/gnu-master-aarch64-spec2k6-O2_LTO. So far, this commit has regressed CI configurations:
- tcwg_bmk_gnu_tx1/gnu-master-aarch64-spec2k6-O2_LTO
Culprit:
<cut>
commit fedcf3c476aff7533741a1c61071200f0a38cf83
Author: Richard Biener <rguenther(a)suse.de>
Date: Thu Jul 8 09:52:49 2021 +0200
tree-optimization/101373 - avoid PRE across externally throwing call
PRE already tries to avoid hoisting possibly trapping expressions
across calls that might not return normally but fails to consider
const calls that throw externally. The following fixes that and
also plugs the hole of trapping references not pruned in case
they are not catched by the actuall call clobbering it.
At -Os we hit the same issue in RTL PRE and postreload-gcse has
even more incomplete checks so the patch adjusts both of those
as well.
2021-07-08 Richard Biener <rguenther(a)suse.de>
PR tree-optimization/101373
* tree-ssa-pre.c (prune_clobbered_mems): Also prune trapping
references when the BB may not return.
(compute_avail): Pass in the function we're working on and
replace cfun references with it. Externally throwing
const calls also possibly terminate the function.
(pass_pre::execute): Pass down the function we're working on.
* gcse.c (compute_hash_table_work): Externally throwing
const/pure calls also need record_last_mem_set_info.
* postreload-gcse.c (record_opr_changes): Looping or externally
throwing const/pure calls also need record_last_mem_set_info.
* g++.dg/torture/pr101373.C: New testcase, XFAILed.
* gnat.dg/opt95.adb: Likewise.
</cut>
Results regressed to (for first_bad == fedcf3c476aff7533741a1c61071200f0a38cf83)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# true:
0
# benchmark -O2_LTO -- artifacts/build-fedcf3c476aff7533741a1c61071200f0a38cf83/results_id:
1
# 429.mcf,mcf_base.default regressed by 106
from (for last_good == fe610051a803131822bd02a8842a67b573b8e46a)
# reset_artifacts:
-10
# build_abe binutils:
-9
# build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer:
-8
# build_abe linux:
-7
# build_abe glibc:
-6
# build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer:
-5
# true:
0
# benchmark -O2_LTO -- artifacts/build-fe610051a803131822bd02a8842a67b573b8e46a/results_id:
1
Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-master-aa…
Results ID of last_good: tx1_64/tcwg_bmk_gnu_tx1/bisect-gnu-master-aarch64-spec2k6-O2_LTO/2345
Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-master-aa…
Results ID of first_bad: tx1_64/tcwg_bmk_gnu_tx1/bisect-gnu-master-aarch64-spec2k6-O2_LTO/2337
Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-master-aa…
Configuration details:
Reproduce builds:
<cut>
mkdir investigate-gcc-fedcf3c476aff7533741a1c61071200f0a38cf83
cd investigate-gcc-fedcf3c476aff7533741a1c61071200f0a38cf83
git clone https://git.linaro.org/toolchain/jenkins-scripts
mkdir -p artifacts/manifests
curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-master-aa… --fail
curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-master-aa… --fail
curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-master-aa… --fail
chmod +x artifacts/test.sh
# Reproduce the baseline build (build all pre-requisites)
./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh
# Save baseline build state (which is then restored in artifacts/test.sh)
mkdir -p ./bisect
rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /gcc/ ./ ./bisect/baseline/
cd gcc
# Reproduce first_bad build
git checkout --detach fedcf3c476aff7533741a1c61071200f0a38cf83
../artifacts/test.sh
# Reproduce last_good build
git checkout --detach fe610051a803131822bd02a8842a67b573b8e46a
../artifacts/test.sh
cd ..
</cut>
History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/…
Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-master-aa…
Build log: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-master-aa…
Full commit (up to 1000 lines):
<cut>
commit fedcf3c476aff7533741a1c61071200f0a38cf83
Author: Richard Biener <rguenther(a)suse.de>
Date: Thu Jul 8 09:52:49 2021 +0200
tree-optimization/101373 - avoid PRE across externally throwing call
PRE already tries to avoid hoisting possibly trapping expressions
across calls that might not return normally but fails to consider
const calls that throw externally. The following fixes that and
also plugs the hole of trapping references not pruned in case
they are not catched by the actuall call clobbering it.
At -Os we hit the same issue in RTL PRE and postreload-gcse has
even more incomplete checks so the patch adjusts both of those
as well.
2021-07-08 Richard Biener <rguenther(a)suse.de>
PR tree-optimization/101373
* tree-ssa-pre.c (prune_clobbered_mems): Also prune trapping
references when the BB may not return.
(compute_avail): Pass in the function we're working on and
replace cfun references with it. Externally throwing
const calls also possibly terminate the function.
(pass_pre::execute): Pass down the function we're working on.
* gcse.c (compute_hash_table_work): Externally throwing
const/pure calls also need record_last_mem_set_info.
* postreload-gcse.c (record_opr_changes): Looping or externally
throwing const/pure calls also need record_last_mem_set_info.
* g++.dg/torture/pr101373.C: New testcase, XFAILed.
* gnat.dg/opt95.adb: Likewise.
---
gcc/gcse.c | 3 ++-
gcc/postreload-gcse.c | 4 +++-
gcc/testsuite/g++.dg/torture/pr101373.C | 33 +++++++++++++++++++++++++++
gcc/testsuite/gnat.dg/opt95.adb | 40 +++++++++++++++++++++++++++++++++
gcc/tree-ssa-pre.c | 34 +++++++++++++++++-----------
5 files changed, 99 insertions(+), 15 deletions(-)
diff --git a/gcc/gcse.c b/gcc/gcse.c
index ecf7e51aac5..ccd33664af5 100644
--- a/gcc/gcse.c
+++ b/gcc/gcse.c
@@ -1537,7 +1537,8 @@ compute_hash_table_work (struct gcse_hash_table_d *table)
record_last_reg_set_info (insn, regno);
if (! RTL_CONST_OR_PURE_CALL_P (insn)
- || RTL_LOOPING_CONST_OR_PURE_CALL_P (insn))
+ || RTL_LOOPING_CONST_OR_PURE_CALL_P (insn)
+ || can_throw_external (insn))
record_last_mem_set_info (insn);
}
diff --git a/gcc/postreload-gcse.c b/gcc/postreload-gcse.c
index 0b28247e299..6c95d09a1e5 100644
--- a/gcc/postreload-gcse.c
+++ b/gcc/postreload-gcse.c
@@ -779,7 +779,9 @@ record_opr_changes (rtx_insn *insn)
EXECUTE_IF_SET_IN_HARD_REG_SET (callee_clobbers, 0, regno, hrsi)
record_last_reg_set_info_regno (insn, regno);
- if (! RTL_CONST_OR_PURE_CALL_P (insn))
+ if (! RTL_CONST_OR_PURE_CALL_P (insn)
+ || RTL_LOOPING_CONST_OR_PURE_CALL_P (insn)
+ || can_throw_external (insn))
record_last_mem_set_info (insn);
}
}
diff --git a/gcc/testsuite/g++.dg/torture/pr101373.C b/gcc/testsuite/g++.dg/torture/pr101373.C
new file mode 100644
index 00000000000..f8c809739e2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr101373.C
@@ -0,0 +1,33 @@
+// { dg-do run }
+// { dg-xfail-run-if "PR100409" { *-*-* } }
+
+int __attribute__((const,noipa)) foo (int j)
+{
+ if (j != 0)
+ throw 1;
+ return 0;
+}
+
+int __attribute__((noipa)) bar (int *p, int n)
+{
+ int ret = 0;
+ if (n)
+ {
+ foo (n);
+ ret = *p;
+ }
+ ret += *p;
+ return ret;
+}
+
+int main()
+{
+ try
+ {
+ return bar (nullptr, 1);
+ }
+ catch (...)
+ {
+ return 0;
+ }
+}
diff --git a/gcc/testsuite/gnat.dg/opt95.adb b/gcc/testsuite/gnat.dg/opt95.adb
new file mode 100644
index 00000000000..2c72582b3f1
--- /dev/null
+++ b/gcc/testsuite/gnat.dg/opt95.adb
@@ -0,0 +1,40 @@
+-- { dg-do run }
+-- { dg-options "-O2 -gnatp" }
+
+procedure Opt95 is
+
+ function Foo (J : Integer) return Integer;
+ pragma Pure_Function (Foo);
+ pragma Machine_Attribute (Foo, "noipa");
+
+ function Foo (J : Integer) return Integer is
+ begin
+ if J /= 0 then
+ raise Constraint_Error;
+ end if;
+ return 0;
+ end;
+
+ function Bar (A : access Integer; N : Integer) return Integer;
+ pragma Machine_Attribute (Bar, "noipa");
+
+ function Bar (A : access Integer; N : Integer) return Integer is
+ Ret : Integer := 0;
+ Ret2 : Integer := 0;
+ begin
+ if N /= 0 then
+ Ret2 := Foo (N);
+ Ret := A.all;
+ end if;
+ Ret := Ret + A.all;
+ return Ret + Ret2;
+ end;
+
+ V : Integer;
+ pragma Volatile (V);
+
+begin
+ V := Bar (null, 1);
+exception
+ when Constraint_Error => null;
+end;
diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c
index 69141c2f0c9..aa5244e678c 100644
--- a/gcc/tree-ssa-pre.c
+++ b/gcc/tree-ssa-pre.c
@@ -2071,6 +2071,13 @@ prune_clobbered_mems (bitmap_set_t set, basic_block block)
&& value_dies_in_block_x (expr, block))))
to_remove = i;
}
+ /* If the REFERENCE may trap make sure the block does not contain
+ a possible exit point.
+ ??? This is overly conservative if we translate AVAIL_OUT
+ as the available expression might be after the exit point. */
+ if (BB_MAY_NOTRETURN (block)
+ && vn_reference_may_trap (ref))
+ to_remove = i;
}
else if (expr->kind == NARY)
{
@@ -3860,7 +3867,7 @@ insert (void)
AVAIL_OUT[BLOCK] = AVAIL_IN[BLOCK] U PHI_GEN[BLOCK] U TMP_GEN[BLOCK]. */
static void
-compute_avail (void)
+compute_avail (function *fun)
{
basic_block block, son;
@@ -3871,7 +3878,7 @@ compute_avail (void)
/* We pretend that default definitions are defined in the entry block.
This includes function arguments and the static chain decl. */
- FOR_EACH_SSA_NAME (i, name, cfun)
+ FOR_EACH_SSA_NAME (i, name, fun)
{
pre_expr e;
if (!SSA_NAME_IS_DEFAULT_DEF (name)
@@ -3881,31 +3888,31 @@ compute_avail (void)
e = get_or_alloc_expr_for_name (name);
add_to_value (get_expr_value_id (e), e);
- bitmap_insert_into_set (TMP_GEN (ENTRY_BLOCK_PTR_FOR_FN (cfun)), e);
- bitmap_value_insert_into_set (AVAIL_OUT (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
+ bitmap_insert_into_set (TMP_GEN (ENTRY_BLOCK_PTR_FOR_FN (fun)), e);
+ bitmap_value_insert_into_set (AVAIL_OUT (ENTRY_BLOCK_PTR_FOR_FN (fun)),
e);
}
if (dump_file && (dump_flags & TDF_DETAILS))
{
- print_bitmap_set (dump_file, TMP_GEN (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
+ print_bitmap_set (dump_file, TMP_GEN (ENTRY_BLOCK_PTR_FOR_FN (fun)),
"tmp_gen", ENTRY_BLOCK);
- print_bitmap_set (dump_file, AVAIL_OUT (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
+ print_bitmap_set (dump_file, AVAIL_OUT (ENTRY_BLOCK_PTR_FOR_FN (fun)),
"avail_out", ENTRY_BLOCK);
}
/* Allocate the worklist. */
- worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
+ worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
/* Seed the algorithm by putting the dominator children of the entry
block on the worklist. */
- for (son = first_dom_son (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
+ for (son = first_dom_son (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (fun));
son;
son = next_dom_son (CDI_DOMINATORS, son))
worklist[sp++] = son;
- BB_LIVE_VOP_ON_EXIT (ENTRY_BLOCK_PTR_FOR_FN (cfun))
- = ssa_default_def (cfun, gimple_vop (cfun));
+ BB_LIVE_VOP_ON_EXIT (ENTRY_BLOCK_PTR_FOR_FN (fun))
+ = ssa_default_def (fun, gimple_vop (fun));
/* Loop until the worklist is empty. */
while (sp)
@@ -3970,7 +3977,8 @@ compute_avail (void)
before it. */
int flags = gimple_call_flags (stmt);
if (!(flags & ECF_CONST)
- || (flags & ECF_LOOPING_CONST_OR_PURE))
+ || (flags & ECF_LOOPING_CONST_OR_PURE)
+ || stmt_can_throw_external (fun, stmt))
BB_MAY_NOTRETURN (block) = 1;
}
@@ -3987,7 +3995,7 @@ compute_avail (void)
BB_LIVE_VOP_ON_EXIT (block) = gimple_vdef (stmt);
if (gimple_has_side_effects (stmt)
- || stmt_could_throw_p (cfun, stmt)
+ || stmt_could_throw_p (fun, stmt)
|| is_gimple_debug (stmt))
continue;
@@ -4384,7 +4392,7 @@ pass_pre::execute (function *fun)
we require AVAIL. */
if (n_basic_blocks_for_fn (fun) < 4000)
{
- compute_avail ();
+ compute_avail (fun);
compute_antic ();
insert ();
}
</cut>