- linaro-toolchain - lists.linaro.org

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tk1/llvm-release-arm-spec2k6-O3_LTO - Build # 8 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O3_LTO. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O3_LTO Culprit: <cut> commit bb1e5399e4586239d6424f5eea5a9f06c52ebe9b Author: Vitaly Buka <vitalybuka(a)google.com> Date: Fri Apr 2 00:58:09 2021 -0700 [NFC][scudo] Inline some functions into ScudoPrimaryTest </cut> Results regressed to (for first_bad == bb1e5399e4586239d6424f5eea5a9f06c52ebe9b) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_LTO_marm artifacts/build-bb1e5399e4586239d6424f5eea5a9f06c52ebe9b/results_id: 1 # 462.libquantum,libquantum_base.default regressed by 105 from (for last_good == f343a730596b6b02039a91d71dc16c113d09cfe6) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_LTO_marm artifacts/build-baseline/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of last_good: tk1_32/tcwg_bmk_llvm_tk1/baseline-llvm-release-arm-spec2k6-O3_LTO/4201 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of first_bad: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O3_LTO/4234 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-bb1e5399e4586239d6424f5eea5a9f06c52ebe9b cd investigate-llvm-bb1e5399e4586239d6424f5eea5a9f06c52ebe9b git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach bb1e5399e4586239d6424f5eea5a9f06c52ebe9b ../artifacts/test.sh # Reproduce last_good build git checkout --detach f343a730596b6b02039a91d71dc16c113d09cfe6 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Full commit (up to 1000 lines): <cut> commit bb1e5399e4586239d6424f5eea5a9f06c52ebe9b Author: Vitaly Buka <vitalybuka(a)google.com> Date: Fri Apr 2 00:58:09 2021 -0700 [NFC][scudo] Inline some functions into ScudoPrimaryTest --- .../lib/scudo/standalone/tests/primary_test.cpp | 173 +++++++++------------ 1 file changed, 71 insertions(+), 102 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp index 07f3d6b77c17..2707985b5c72 100644 --- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp @@ -21,37 +21,6 @@ // 32-bit architectures. It's not something we want to encourage, but we still // should ensure the tests pass. -template <typename Primary> static void testPrimary() { - const scudo::uptr NumberOfAllocations = 32U; - auto Deleter = [](Primary *P) { - P->unmapTestOnly(); - delete P; - }; - std::unique_ptr<Primary, decltype(Deleter)> Allocator(new Primary, Deleter); - Allocator->init(/*ReleaseToOsInterval=*/-1); - typename Primary::CacheT Cache; - Cache.init(nullptr, Allocator.get()); - for (scudo::uptr I = 0; I <= 16U; I++) { - const scudo::uptr Size = 1UL << I; - if (!Primary::canAllocate(Size)) - continue; - const scudo::uptr ClassId = Primary::SizeClassMap::getClassIdBySize(Size); - void *Pointers[NumberOfAllocations]; - for (scudo::uptr J = 0; J < NumberOfAllocations; J++) { - void *P = Cache.allocate(ClassId); - memset(P, 'B', Size); - Pointers[J] = P; - } - for (scudo::uptr J = 0; J < NumberOfAllocations; J++) - Cache.deallocate(ClassId, Pointers[J]); - } - Cache.destroy(nullptr); - Allocator->releaseToOS(); - scudo::ScopedString Str(1024); - Allocator->getStats(&Str); - Str.output(); -} - struct TestConfig1 { static const scudo::uptr PrimaryRegionSizeLog = 18U; static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; @@ -84,13 +53,16 @@ struct Config : public BaseConfig { using SizeClassMap = SizeClassMapT; }; -template <typename BaseConfig, typename SizeClassMapT> struct MakeAllocator { - using Value = scudo::SizeClassAllocator64<Config<BaseConfig, SizeClassMapT>>; -}; - +template <typename BaseConfig, typename SizeClassMapT> +struct SizeClassAllocator + : public scudo::SizeClassAllocator64<Config<BaseConfig, SizeClassMapT>> {}; template <typename SizeClassMapT> -struct MakeAllocator<TestConfig1, SizeClassMapT> { - using Value = scudo::SizeClassAllocator32<Config<TestConfig1, SizeClassMapT>>; +struct SizeClassAllocator<TestConfig1, SizeClassMapT> + : public scudo::SizeClassAllocator32<Config<TestConfig1, SizeClassMapT>> {}; + +template <typename BaseConfig, typename SizeClassMapT> +struct TestAllocator : public SizeClassAllocator<BaseConfig, SizeClassMapT> { + ~TestAllocator() { this->unmapTestOnly(); } }; namespace testing { @@ -114,8 +86,31 @@ using ScudoPrimaryTestTypes = testing::Types< TYPED_TEST_CASE(ScudoPrimaryTest, ScudoPrimaryTestTypes); TYPED_TEST(ScudoPrimaryTest, BasicPrimary) { - using SizeClassMap = scudo::DefaultSizeClassMap; - testPrimary<typename MakeAllocator<TypeParam, SizeClassMap>::Value>(); + using Primary = TestAllocator<TypeParam, scudo::DefaultSizeClassMap>; + std::unique_ptr<Primary> Allocator(new Primary); + Allocator->init(/*ReleaseToOsInterval=*/-1); + typename Primary::CacheT Cache; + Cache.init(nullptr, Allocator.get()); + const scudo::uptr NumberOfAllocations = 32U; + for (scudo::uptr I = 0; I <= 16U; I++) { + const scudo::uptr Size = 1UL << I; + if (!Primary::canAllocate(Size)) + continue; + const scudo::uptr ClassId = Primary::SizeClassMap::getClassIdBySize(Size); + void *Pointers[NumberOfAllocations]; + for (scudo::uptr J = 0; J < NumberOfAllocations; J++) { + void *P = Cache.allocate(ClassId); + memset(P, 'B', Size); + Pointers[J] = P; + } + for (scudo::uptr J = 0; J < NumberOfAllocations; J++) + Cache.deallocate(ClassId, Pointers[J]); + } + Cache.destroy(nullptr); + Allocator->releaseToOS(); + scudo::ScopedString Str(1024); + Allocator->getStats(&Str); + Str.output(); } struct SmallRegionsConfig { @@ -166,12 +161,9 @@ TEST(ScudoPrimaryTest, Primary64OOM) { Allocator.unmapTestOnly(); } -template <typename Primary> static void testIteratePrimary() { - auto Deleter = [](Primary *P) { - P->unmapTestOnly(); - delete P; - }; - std::unique_ptr<Primary, decltype(Deleter)> Allocator(new Primary, Deleter); +TYPED_TEST(ScudoPrimaryTest, PrimaryIterate) { + using Primary = TestAllocator<TypeParam, scudo::DefaultSizeClassMap>; + std::unique_ptr<Primary> Allocator(new Primary); Allocator->init(/*ReleaseToOsInterval=*/-1); typename Primary::CacheT Cache; Cache.init(nullptr, Allocator.get()); @@ -205,50 +197,40 @@ template <typename Primary> static void testIteratePrimary() { Str.output(); } -TYPED_TEST(ScudoPrimaryTest, PrimaryIterate) { - using SizeClassMap = scudo::DefaultSizeClassMap; - testIteratePrimary<typename MakeAllocator<TypeParam, SizeClassMap>::Value>(); -} - -static std::mutex Mutex; -static std::condition_variable Cv; -static bool Ready; - -template <typename Primary> static void performAllocations(Primary *Allocator) { - static thread_local typename Primary::CacheT Cache; - Cache.init(nullptr, Allocator); - std::vector<std::pair<scudo::uptr, void *>> V; - { - std::unique_lock<std::mutex> Lock(Mutex); - while (!Ready) - Cv.wait(Lock); - } - for (scudo::uptr I = 0; I < 256U; I++) { - const scudo::uptr Size = std::rand() % Primary::SizeClassMap::MaxSize / 4; - const scudo::uptr ClassId = Primary::SizeClassMap::getClassIdBySize(Size); - void *P = Cache.allocate(ClassId); - if (P) - V.push_back(std::make_pair(ClassId, P)); - } - while (!V.empty()) { - auto Pair = V.back(); - Cache.deallocate(Pair.first, Pair.second); - V.pop_back(); - } - Cache.destroy(nullptr); -} - -template <typename Primary> static void testPrimaryThreaded() { - Ready = false; - auto Deleter = [](Primary *P) { - P->unmapTestOnly(); - delete P; - }; - std::unique_ptr<Primary, decltype(Deleter)> Allocator(new Primary, Deleter); +TYPED_TEST(ScudoPrimaryTest, PrimaryThreaded) { + using Primary = TestAllocator<TypeParam, scudo::SvelteSizeClassMap>; + std::unique_ptr<Primary> Allocator(new Primary); Allocator->init(/*ReleaseToOsInterval=*/-1); + std::mutex Mutex; + std::condition_variable Cv; + bool Ready = false; std::thread Threads[32]; for (scudo::uptr I = 0; I < ARRAY_SIZE(Threads); I++) - Threads[I] = std::thread(performAllocations<Primary>, Allocator.get()); + Threads[I] = std::thread([&]() { + static thread_local typename Primary::CacheT Cache; + Cache.init(nullptr, Allocator.get()); + std::vector<std::pair<scudo::uptr, void *>> V; + { + std::unique_lock<std::mutex> Lock(Mutex); + while (!Ready) + Cv.wait(Lock); + } + for (scudo::uptr I = 0; I < 256U; I++) { + const scudo::uptr Size = + std::rand() % Primary::SizeClassMap::MaxSize / 4; + const scudo::uptr ClassId = + Primary::SizeClassMap::getClassIdBySize(Size); + void *P = Cache.allocate(ClassId); + if (P) + V.push_back(std::make_pair(ClassId, P)); + } + while (!V.empty()) { + auto Pair = V.back(); + Cache.deallocate(Pair.first, Pair.second); + V.pop_back(); + } + Cache.destroy(nullptr); + }); { std::unique_lock<std::mutex> Lock(Mutex); Ready = true; @@ -262,20 +244,12 @@ template <typename Primary> static void testPrimaryThreaded() { Str.output(); } -TYPED_TEST(ScudoPrimaryTest, PrimaryThreaded) { - using SizeClassMap = scudo::SvelteSizeClassMap; - testPrimaryThreaded<typename MakeAllocator<TypeParam, SizeClassMap>::Value>(); -} - // Through a simple allocation that spans two pages, verify that releaseToOS // actually releases some bytes (at least one page worth). This is a regression // test for an error in how the release criteria were computed. -template <typename Primary> static void testReleaseToOS() { - auto Deleter = [](Primary *P) { - P->unmapTestOnly(); - delete P; - }; - std::unique_ptr<Primary, decltype(Deleter)> Allocator(new Primary, Deleter); +TYPED_TEST(ScudoPrimaryTest, ReleaseToOS) { + using Primary = TestAllocator<TypeParam, scudo::DefaultSizeClassMap>; + std::unique_ptr<Primary> Allocator(new Primary); Allocator->init(/*ReleaseToOsInterval=*/-1); typename Primary::CacheT Cache; Cache.init(nullptr, Allocator.get()); @@ -288,8 +262,3 @@ template <typename Primary> static void testReleaseToOS() { Cache.destroy(nullptr); EXPECT_GT(Allocator->releaseToOS(), 0U); } - -TYPED_TEST(ScudoPrimaryTest, ReleaseToOS) { - using SizeClassMap = scudo::DefaultSizeClassMap; - testReleaseToOS<typename MakeAllocator<TypeParam, SizeClassMap>::Value>(); -} </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/llvm-release-aarch64-spec2k6-O3_LTO - Build # 8 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-release-aarch64-spec2k6-O3_LTO. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tx1/llvm-release-aarch64-spec2k6-O3_LTO Culprit: <cut> commit ad558a4ff7cd61081cfeaabff1dbc8c0a9afa92b Author: Carl Ritson <carl.ritson(a)amd.com> Date: Tue May 11 12:14:01 2021 +0900 [AMDGPU] Pre-commit tests for D102211 </cut> Results regressed to (for first_bad == ad558a4ff7cd61081cfeaabff1dbc8c0a9afa92b) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_LTO artifacts/build-ad558a4ff7cd61081cfeaabff1dbc8c0a9afa92b/results_id: 1 # 473.astar,astar_base.default regressed by 104 from (for last_good == d8ec2b183e9243366e3a0cd1116dbe879856b333) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_LTO artifacts/build-d8ec2b183e9243366e3a0cd1116dbe879856b333/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-release-aarch64-spec2k6-O3_LTO/4218 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-release-aarch64-spec2k6-O3_LTO/4219 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-ad558a4ff7cd61081cfeaabff1dbc8c0a9afa92b cd investigate-llvm-ad558a4ff7cd61081cfeaabff1dbc8c0a9afa92b git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach ad558a4ff7cd61081cfeaabff1dbc8c0a9afa92b ../artifacts/test.sh # Reproduce last_good build git checkout --detach d8ec2b183e9243366e3a0cd1116dbe879856b333 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Full commit (up to 1000 lines): <cut> commit ad558a4ff7cd61081cfeaabff1dbc8c0a9afa92b Author: Carl Ritson <carl.ritson(a)amd.com> Date: Tue May 11 12:14:01 2021 +0900 [AMDGPU] Pre-commit tests for D102211 --- llvm/test/CodeGen/AMDGPU/hard-clauses.mir | 36 +++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/hard-clauses.mir b/llvm/test/CodeGen/AMDGPU/hard-clauses.mir index 506f9a77c177..e6ca33341bfb 100644 --- a/llvm/test/CodeGen/AMDGPU/hard-clauses.mir +++ b/llvm/test/CodeGen/AMDGPU/hard-clauses.mir @@ -209,3 +209,39 @@ body: | $vgpr79 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 316, 0, 0, 0, implicit $exec $vgpr80 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 320, 0, 0, 0, implicit $exec ... + +--- +name: mimg_nsa +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; CHECK-LABEL: name: mimg_nsa + ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; CHECK: BUNDLE implicit-def $vgpr10_vgpr11_vgpr12_vgpr13, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit-def $vgpr13, implicit-def $vgpr13_lo16, implicit-def $vgpr13_hi16, implicit-def $vgpr10_vgpr11, implicit-def $vgpr10_vgpr11_vgpr12, implicit-def $vgpr11_vgpr12, implicit-def $vgpr11_vgpr12_vgpr13, implicit-def $vgpr12_vgpr13, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr23, implicit-def $vgpr23_lo16, implicit-def $vgpr23_hi16, implicit-def $vgpr20_vgpr21, implicit-def $vgpr20_vgpr21_vgpr22, implicit-def $vgpr21_vgpr22, implicit-def $vgpr21_vgpr22_vgpr23, implicit-def $vgpr22_vgpr23, implicit $vgpr3, implicit $vgpr8, implicit $vgpr7, implicit $vgpr5, implicit $vgpr4, implicit $vgpr6, implicit $vgpr0, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec { + ; CHECK: S_CLAUSE 1 + ; CHECK: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) + ; CHECK: $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) + ; CHECK: } + $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) + $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) +... + +--- +name: mimg_nsa_mixed +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; CHECK-LABEL: name: mimg_nsa_mixed + ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; CHECK: BUNDLE implicit-def $vgpr10_vgpr11_vgpr12_vgpr13, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit-def $vgpr13, implicit-def $vgpr13_lo16, implicit-def $vgpr13_hi16, implicit-def $vgpr10_vgpr11, implicit-def $vgpr10_vgpr11_vgpr12, implicit-def $vgpr11_vgpr12, implicit-def $vgpr11_vgpr12_vgpr13, implicit-def $vgpr12_vgpr13, implicit-def $vgpr14, implicit-def $vgpr14_lo16, implicit-def $vgpr14_hi16, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr23, implicit-def $vgpr23_lo16, implicit-def $vgpr23_hi16, implicit-def $vgpr20_vgpr21, implicit-def $vgpr20_vgpr21_vgpr22, implicit-def $vgpr21_vgpr22, implicit-def $vgpr21_vgpr22_vgpr23, implicit-def $vgpr22_vgpr23, implicit $vgpr3, implicit $vgpr8, implicit $vgpr7, implicit $vgpr5, implicit $vgpr4, implicit $vgpr6, implicit $vgpr0, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec, implicit $vgpr5_vgpr6 { + ; CHECK: S_CLAUSE 2 + ; CHECK: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) + ; CHECK: $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") + ; CHECK: $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) + ; CHECK: } + $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) + $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") + $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) +... </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tk1/llvm-release-arm-spec2k6-O3 - Build # 11 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O3. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O3 Culprit: <cut> commit 99203f2004d031f2ef22f01e3c569d2775de1836 Author: Alexey Bataev <a.bataev(a)outlook.com> Date: Tue Mar 23 13:22:58 2021 -0700 [Analysis]Add getPointersDiff function to improve compile time. Added getPointersDiff function to LoopAccessAnalysis and used it instead direct calculatoin of the distance between pointers and/or isConsecutiveAccess function in SLP vectorizer to improve compile time and detection of stores consecutive chains. Part of D57059 Differential Revision: https://reviews.llvm.org/D98967 </cut> Results regressed to (for first_bad == 99203f2004d031f2ef22f01e3c569d2775de1836) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_marm artifacts/build-99203f2004d031f2ef22f01e3c569d2775de1836/results_id: 1 # 458.sjeng,sjeng_base.default regressed by 103 from (for last_good == 4157a079afbf7fa5c3ce3ac0e9f4541f89188ae2) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_marm artifacts/build-4157a079afbf7fa5c3ce3ac0e9f4541f89188ae2/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of last_good: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O3/4192 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of first_bad: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O3/4194 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-99203f2004d031f2ef22f01e3c569d2775de1836 cd investigate-llvm-99203f2004d031f2ef22f01e3c569d2775de1836 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach 99203f2004d031f2ef22f01e3c569d2775de1836 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 4157a079afbf7fa5c3ce3ac0e9f4541f89188ae2 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Full commit (up to 1000 lines): <cut> commit 99203f2004d031f2ef22f01e3c569d2775de1836 Author: Alexey Bataev <a.bataev(a)outlook.com> Date: Tue Mar 23 13:22:58 2021 -0700 [Analysis]Add getPointersDiff function to improve compile time. Added getPointersDiff function to LoopAccessAnalysis and used it instead direct calculatoin of the distance between pointers and/or isConsecutiveAccess function in SLP vectorizer to improve compile time and detection of stores consecutive chains. Part of D57059 Differential Revision: https://reviews.llvm.org/D98967 --- llvm/include/llvm/Analysis/LoopAccessAnalysis.h | 9 + llvm/lib/Analysis/LoopAccessAnalysis.cpp | 198 ++++++++++------------ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 79 ++++++--- llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll | 17 +- 4 files changed, 163 insertions(+), 140 deletions(-) diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index 13fbe884eddf..39acfd5bbbee 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -679,6 +679,15 @@ int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap = ValueToValueMap(), bool Assume = false, bool ShouldCheckWrap = true); +/// Returns the distance between the pointers \p PtrA and \p PtrB iff they are +/// compatible and it is possible to calculate the distance between them. This +/// is a simple API that does not depend on the analysis pass. +/// \param StrictCheck Ensure that the calculated distance matches the +/// type-based one after all the bitcasts removal in the provided pointers. +Optional<int> getPointersDiff(Value *PtrA, Value *PtrB, const DataLayout &DL, + ScalarEvolution &SE, bool StrictCheck = false, + bool CheckType = true); + /// Attempt to sort the pointers in \p VL and return the sorted indices /// in \p SortedIndices, if reordering is required. /// diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index e632fe25c24c..997d4474a448 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1124,139 +1124,123 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, return Stride; } +Optional<int> llvm::getPointersDiff(Value *PtrA, Value *PtrB, + const DataLayout &DL, ScalarEvolution &SE, + bool StrictCheck, bool CheckType) { + assert(PtrA && PtrB && "Expected non-nullptr pointers."); + // Make sure that A and B are different pointers. + if (PtrA == PtrB) + return 0; + + // Make sure that PtrA and PtrB have the same type if required + if (CheckType && PtrA->getType() != PtrB->getType()) + return None; + + unsigned ASA = PtrA->getType()->getPointerAddressSpace(); + unsigned ASB = PtrB->getType()->getPointerAddressSpace(); + + // Check that the address spaces match. + if (ASA != ASB) + return None; + unsigned IdxWidth = DL.getIndexSizeInBits(ASA); + + APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0); + Value *PtrA1 = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); + Value *PtrB1 = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); + + int Val; + if (PtrA1 == PtrB1) { + // Retrieve the address space again as pointer stripping now tracks through + // `addrspacecast`. + ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace(); + ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace(); + // Check that the address spaces match and that the pointers are valid. + if (ASA != ASB) + return None; + + IdxWidth = DL.getIndexSizeInBits(ASA); + OffsetA = OffsetA.sextOrTrunc(IdxWidth); + OffsetB = OffsetB.sextOrTrunc(IdxWidth); + + OffsetB -= OffsetA; + Val = OffsetB.getSExtValue(); + } else { + // Otherwise compute the distance with SCEV between the base pointers. + const SCEV *PtrSCEVA = SE.getSCEV(PtrA); + const SCEV *PtrSCEVB = SE.getSCEV(PtrB); + const auto *Diff = + dyn_cast<SCEVConstant>(SE.getMinusSCEV(PtrSCEVB, PtrSCEVA)); + if (!Diff) + return None; + Val = Diff->getAPInt().getSExtValue(); + } + Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); + int Size = DL.getTypeStoreSize(Ty); + int Dist = Val / Size; + + // Ensure that the calculated distance matches the type-based one after all + // the bitcasts removal in the provided pointers. + if (!StrictCheck || Dist * Size == Val) + return Dist; + return None; +} + bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl<unsigned> &SortedIndices) { assert(llvm::all_of( VL, [](const Value *V) { return V->getType()->isPointerTy(); }) && "Expected list of pointer operands."); - SmallVector<std::pair<int64_t, Value *>, 4> OffValPairs; - OffValPairs.reserve(VL.size()); - // Walk over the pointers, and map each of them to an offset relative to // first pointer in the array. Value *Ptr0 = VL[0]; - const SCEV *Scev0 = SE.getSCEV(Ptr0); - Value *Obj0 = getUnderlyingObject(Ptr0); - - llvm::SmallSet<int64_t, 4> Offsets; - for (auto *Ptr : VL) { - // TODO: Outline this code as a special, more time consuming, version of - // computeConstantDifference() function. - if (Ptr->getType()->getPointerAddressSpace() != - Ptr0->getType()->getPointerAddressSpace()) - return false; - // If a pointer refers to a different underlying object, bail - the - // pointers are by definition incomparable. - Value *CurrObj = getUnderlyingObject(Ptr); - if (CurrObj != Obj0) - return false; - const SCEV *Scev = SE.getSCEV(Ptr); - const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Scev, Scev0)); - // The pointers may not have a constant offset from each other, or SCEV - // may just not be smart enough to figure out they do. Regardless, - // there's nothing we can do. + using DistOrdPair = std::pair<int64_t, int>; + auto Compare = [](const DistOrdPair &L, const DistOrdPair &R) { + return L.first < R.first; + }; + std::set<DistOrdPair, decltype(Compare)> Offsets(Compare); + Offsets.emplace(0, 0); + int Cnt = 1; + bool IsConsecutive = true; + for (auto *Ptr : VL.drop_front()) { + Optional<int> Diff = getPointersDiff(Ptr0, Ptr, DL, SE); if (!Diff) return false; // Check if the pointer with the same offset is found. - int64_t Offset = Diff->getAPInt().getSExtValue(); - if (!Offsets.insert(Offset).second) + int64_t Offset = *Diff; + auto Res = Offsets.emplace(Offset, Cnt); + if (!Res.second) return false; - OffValPairs.emplace_back(Offset, Ptr); + // Consecutive order if the inserted element is the last one. + IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end(); + ++Cnt; } SortedIndices.clear(); - SortedIndices.resize(VL.size()); - std::iota(SortedIndices.begin(), SortedIndices.end(), 0); - - // Sort the memory accesses and keep the order of their uses in UseOrder. - llvm::stable_sort(SortedIndices, [&](unsigned Left, unsigned Right) { - return OffValPairs[Left].first < OffValPairs[Right].first; - }); - - // Check if the order is consecutive already. - if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) { - return I == SortedIndices[I]; - })) - SortedIndices.clear(); - + if (!IsConsecutive) { + // Fill SortedIndices array only if it is non-consecutive. + SortedIndices.resize(VL.size()); + Cnt = 0; + for (const std::pair<int64_t, int> &Pair : Offsets) { + IsConsecutive = IsConsecutive && Cnt == Pair.second; + SortedIndices[Cnt] = Pair.second; + ++Cnt; + } + } return true; } -/// Take the address space operand from the Load/Store instruction. -/// Returns -1 if this is not a valid Load/Store instruction. -static unsigned getAddressSpaceOperand(Value *I) { - if (LoadInst *L = dyn_cast<LoadInst>(I)) - return L->getPointerAddressSpace(); - if (StoreInst *S = dyn_cast<StoreInst>(I)) - return S->getPointerAddressSpace(); - return -1; -} - /// Returns true if the memory operations \p A and \p B are consecutive. bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType) { Value *PtrA = getLoadStorePointerOperand(A); Value *PtrB = getLoadStorePointerOperand(B); - unsigned ASA = getAddressSpaceOperand(A); - unsigned ASB = getAddressSpaceOperand(B); - - // Check that the address spaces match and that the pointers are valid. - if (!PtrA || !PtrB || (ASA != ASB)) - return false; - - // Make sure that A and B are different pointers. - if (PtrA == PtrB) - return false; - - // Make sure that A and B have the same type if required. - if (CheckType && PtrA->getType() != PtrB->getType()) + if (!PtrA || !PtrB) return false; - - unsigned IdxWidth = DL.getIndexSizeInBits(ASA); - Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); - - APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0); - PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); - PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); - - // Retrieve the address space again as pointer stripping now tracks through - // `addrspacecast`. - ASA = cast<PointerType>(PtrA->getType())->getAddressSpace(); - ASB = cast<PointerType>(PtrB->getType())->getAddressSpace(); - // Check that the address spaces match and that the pointers are valid. - if (ASA != ASB) - return false; - - IdxWidth = DL.getIndexSizeInBits(ASA); - OffsetA = OffsetA.sextOrTrunc(IdxWidth); - OffsetB = OffsetB.sextOrTrunc(IdxWidth); - - APInt Size(IdxWidth, DL.getTypeStoreSize(Ty)); - - // OffsetDelta = OffsetB - OffsetA; - const SCEV *OffsetSCEVA = SE.getConstant(OffsetA); - const SCEV *OffsetSCEVB = SE.getConstant(OffsetB); - const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA); - const APInt &OffsetDelta = cast<SCEVConstant>(OffsetDeltaSCEV)->getAPInt(); - - // Check if they are based on the same pointer. That makes the offsets - // sufficient. - if (PtrA == PtrB) - return OffsetDelta == Size; - - // Compute the necessary base pointer delta to have the necessary final delta - // equal to the size. - // BaseDelta = Size - OffsetDelta; - const SCEV *SizeSCEV = SE.getConstant(Size); - const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV); - - // Otherwise compute the distance with SCEV between the base pointers. - const SCEV *PtrSCEVA = SE.getSCEV(PtrA); - const SCEV *PtrSCEVB = SE.getSCEV(PtrB); - const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta); - return X == PtrSCEVB; + Optional<int> Diff = + getPointersDiff(PtrA, PtrB, DL, SE, /*StrictCheck=*/true, CheckType); + return Diff && *Diff == 1; } MemoryDepChecker::VectorizationSafetyStatus diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 385b6f30dc0f..78d2ea0032db 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -941,10 +941,16 @@ public: ScalarEvolution &SE) { auto *LI1 = dyn_cast<LoadInst>(V1); auto *LI2 = dyn_cast<LoadInst>(V2); - if (LI1 && LI2) - return isConsecutiveAccess(LI1, LI2, DL, SE) - ? VLOperands::ScoreConsecutiveLoads - : VLOperands::ScoreFail; + if (LI1 && LI2) { + if (LI1->getParent() != LI2->getParent()) + return VLOperands::ScoreFail; + + Optional<int> Dist = + getPointersDiff(LI1->getPointerOperand(), LI2->getPointerOperand(), + DL, SE, /*StrictCheck=*/true); + return (Dist && *Dist == 1) ? VLOperands::ScoreConsecutiveLoads + : VLOperands::ScoreFail; + } auto *C1 = dyn_cast<Constant>(V1); auto *C2 = dyn_cast<Constant>(V2); @@ -2871,13 +2877,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, Ptr0 = PointerOps[CurrentOrder.front()]; PtrN = PointerOps[CurrentOrder.back()]; } - const SCEV *Scev0 = SE->getSCEV(Ptr0); - const SCEV *ScevN = SE->getSCEV(PtrN); - const auto *Diff = - dyn_cast<SCEVConstant>(SE->getMinusSCEV(ScevN, Scev0)); - uint64_t Size = DL->getTypeAllocSize(ScalarTy); + Optional<int> Diff = getPointersDiff(Ptr0, PtrN, *DL, *SE); // Check that the sorted loads are consecutive. - if (Diff && Diff->getAPInt() == (VL.size() - 1) * Size) { + if (static_cast<unsigned>(*Diff) == VL.size() - 1) { if (CurrentOrder.empty()) { // Original loads are consecutive and does not require reordering. ++NumOpsWantToKeepOriginalOrder; @@ -3150,13 +3152,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, Ptr0 = PointerOps[CurrentOrder.front()]; PtrN = PointerOps[CurrentOrder.back()]; } - const SCEV *Scev0 = SE->getSCEV(Ptr0); - const SCEV *ScevN = SE->getSCEV(PtrN); - const auto *Diff = - dyn_cast<SCEVConstant>(SE->getMinusSCEV(ScevN, Scev0)); - uint64_t Size = DL->getTypeAllocSize(ScalarTy); + Optional<int> Dist = getPointersDiff(Ptr0, PtrN, *DL, *SE); // Check that the sorted pointer operands are consecutive. - if (Diff && Diff->getAPInt() == (VL.size() - 1) * Size) { + if (static_cast<unsigned>(*Dist) == VL.size() - 1) { if (CurrentOrder.empty()) { // Original stores are consecutive and does not require reordering. ++NumOpsWantToKeepOriginalOrder; @@ -6107,20 +6105,41 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores, int E = Stores.size(); SmallBitVector Tails(E, false); - SmallVector<int, 16> ConsecutiveChain(E, E + 1); int MaxIter = MaxStoreLookup.getValue(); + SmallVector<std::pair<int, int>, 16> ConsecutiveChain( + E, std::make_pair(E, INT_MAX)); + SmallVector<SmallBitVector, 4> CheckedPairs(E, SmallBitVector(E, false)); int IterCnt; auto &&FindConsecutiveAccess = [this, &Stores, &Tails, &IterCnt, MaxIter, + &CheckedPairs, &ConsecutiveChain](int K, int Idx) { if (IterCnt >= MaxIter) return true; + if (CheckedPairs[Idx].test(K)) + return ConsecutiveChain[K].second == 1 && + ConsecutiveChain[K].first == Idx; ++IterCnt; - if (!isConsecutiveAccess(Stores[K], Stores[Idx], *DL, *SE)) + CheckedPairs[Idx].set(K); + CheckedPairs[K].set(Idx); + Optional<int> Diff = getPointersDiff(Stores[K]->getPointerOperand(), + Stores[Idx]->getPointerOperand(), *DL, + *SE, /*StrictCheck=*/true); + if (!Diff || *Diff == 0) + return false; + int Val = *Diff; + if (Val < 0) { + if (ConsecutiveChain[Idx].second > -Val) { + Tails.set(K); + ConsecutiveChain[Idx] = std::make_pair(K, -Val); + } + return false; + } + if (ConsecutiveChain[K].second <= Val) return false; Tails.set(Idx); - ConsecutiveChain[K] = Idx; - return true; + ConsecutiveChain[K] = std::make_pair(Idx, Val); + return Val == 1; }; // Do a quadratic search on all of the given stores in reverse order and find // all of the pairs of stores that follow each other. @@ -6140,17 +6159,31 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores, // For stores that start but don't end a link in the chain: for (int Cnt = E; Cnt > 0; --Cnt) { int I = Cnt - 1; - if (ConsecutiveChain[I] == E + 1 || Tails.test(I)) + if (ConsecutiveChain[I].first == E || Tails.test(I)) continue; // We found a store instr that starts a chain. Now follow the chain and try // to vectorize it. BoUpSLP::ValueList Operands; // Collect the chain into a list. - while (I != E + 1 && !VectorizedStores.count(Stores[I])) { + while (I != E && !VectorizedStores.count(Stores[I])) { Operands.push_back(Stores[I]); + Tails.set(I); + if (ConsecutiveChain[I].second != 1) { + // Mark the new end in the chain and go back, if required. It might be + // required if the original stores come in reversed order, for example. + if (ConsecutiveChain[I].first != E && + Tails.test(ConsecutiveChain[I].first) && + !VectorizedStores.count(Stores[ConsecutiveChain[I].first])) { + Tails.reset(ConsecutiveChain[I].first); + if (Cnt < ConsecutiveChain[I].first + 2) + Cnt = ConsecutiveChain[I].first + 2; + } + break; + } // Move to the next value in the chain. - I = ConsecutiveChain[I]; + I = ConsecutiveChain[I].first; } + assert(!Operands.empty() && "Expected non-empty list of stores."); unsigned MaxVecRegSize = R.getMaxVecRegSize(); unsigned EltSize = R.getVectorElementSize(Operands[0]); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll index 267cf1a02c29..e28362894910 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer -slp-vectorizer -mattr=+sse2 -S | FileCheck %s --check-prefix=SSE -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer -slp-vectorizer -mattr=+avx -S | FileCheck %s --check-prefix=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer -slp-vectorizer -mattr=+avx2 -S | FileCheck %s --check-prefix=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer -mattr=+sse2 -S | FileCheck %s --check-prefix=SSE +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer -mattr=+avx -S | FileCheck %s --check-prefix=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer -mattr=+avx2 -S | FileCheck %s --check-prefix=AVX %class.1 = type { %class.2 } %class.2 = type { %"class.3" } @@ -117,13 +117,10 @@ define void @pr35497() local_unnamed_addr #0 { ; AVX-NEXT: [[ARRAYIDX2_6:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 0 ; AVX-NEXT: [[TMP10:%.*]] = bitcast i64* [[ARRAYIDX2_6]] to <2 x i64>* ; AVX-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[TMP10]], align 1 -; AVX-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 -; AVX-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP11]], i32 0 -; AVX-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP5]], i32 1 -; AVX-NEXT: [[TMP14:%.*]] = lshr <2 x i64> [[TMP13]], <i64 6, i64 6> -; AVX-NEXT: [[TMP15:%.*]] = add nuw nsw <2 x i64> [[TMP9]], [[TMP14]] -; AVX-NEXT: [[TMP16:%.*]] = bitcast i64* [[ARRAYIDX2_2]] to <2 x i64>* -; AVX-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[TMP16]], align 1 +; AVX-NEXT: [[TMP11:%.*]] = lshr <2 x i64> [[TMP4]], <i64 6, i64 6> +; AVX-NEXT: [[TMP12:%.*]] = add nuw nsw <2 x i64> [[TMP9]], [[TMP11]] +; AVX-NEXT: [[TMP13:%.*]] = bitcast i64* [[ARRAYIDX2_2]] to <2 x i64>* +; AVX-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* [[TMP13]], align 1 ; AVX-NEXT: ret void ; entry: </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/llvm-master-aarch64-spec2k6-O2_LTO - Build # 24 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O2_LTO. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O2_LTO Culprit: <cut> commit 643ce61fb3c2c730b7ecead4a489eaeef3f053ea Author: Akira Hatanaka <ahatanaka(a)apple.com> Date: Wed Aug 11 12:55:28 2021 -0700 [ObjC][ARC] Don't form a StoreStrong call if it is unsafe to move the release call findSafeStoreForStoreStrongContraction checks whether it's safe to move the release call to the store by inspecting all instructions between the two, but was ignoring retain instructions. This was causing objects to be released and deallocated before they were retained. rdar://81668577 </cut> Results regressed to (for first_bad == 643ce61fb3c2c730b7ecead4a489eaeef3f053ea) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2_LTO artifacts/build-643ce61fb3c2c730b7ecead4a489eaeef3f053ea/results_id: 1 # 433.milc,milc_base.default regressed by 103 from (for last_good == 767496d19cb9a1fbba57ff08095faa161998ee36) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2_LTO artifacts/build-767496d19cb9a1fbba57ff08095faa161998ee36/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O2_LTO/4172 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O2_LTO/4171 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-643ce61fb3c2c730b7ecead4a489eaeef3f053ea cd investigate-llvm-643ce61fb3c2c730b7ecead4a489eaeef3f053ea git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach 643ce61fb3c2c730b7ecead4a489eaeef3f053ea ../artifacts/test.sh # Reproduce last_good build git checkout --detach 767496d19cb9a1fbba57ff08095faa161998ee36 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Full commit (up to 1000 lines): <cut> commit 643ce61fb3c2c730b7ecead4a489eaeef3f053ea Author: Akira Hatanaka <ahatanaka(a)apple.com> Date: Wed Aug 11 12:55:28 2021 -0700 [ObjC][ARC] Don't form a StoreStrong call if it is unsafe to move the release call findSafeStoreForStoreStrongContraction checks whether it's safe to move the release call to the store by inspecting all instructions between the two, but was ignoring retain instructions. This was causing objects to be released and deallocated before they were retained. rdar://81668577 --- llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp | 21 ++++++++++++--------- .../test/Transforms/ObjCARC/contract-storestrong.ll | 19 +++++++++++++++++++ 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp index 62161b5b6b40..577973c80601 100644 --- a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -226,13 +226,6 @@ static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load, // of Inst. ARCInstKind Class = GetBasicARCInstKind(Inst); - // If Inst is an unrelated retain, we don't care about it. - // - // TODO: This is one area where the optimization could be made more - // aggressive. - if (IsRetain(Class)) - continue; - // If we have seen the store, but not the release... if (Store) { // We need to make sure that it is safe to move the release from its @@ -248,8 +241,18 @@ static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load, return nullptr; } - // Ok, now we know we have not seen a store yet. See if Inst can write to - // our load location, if it can not, just ignore the instruction. + // Ok, now we know we have not seen a store yet. + + // If Inst is a retain, we don't care about it as it doesn't prevent moving + // the load to the store. + // + // TODO: This is one area where the optimization could be made more + // aggressive. + if (IsRetain(Class)) + continue; + + // See if Inst can write to our load location, if it can not, just ignore + // the instruction. if (!isModSet(AA->getModRefInfo(Inst, Loc))) continue; diff --git a/llvm/test/Transforms/ObjCARC/contract-storestrong.ll b/llvm/test/Transforms/ObjCARC/contract-storestrong.ll index eff0a6fdf900..9c45e3334d83 100644 --- a/llvm/test/Transforms/ObjCARC/contract-storestrong.ll +++ b/llvm/test/Transforms/ObjCARC/contract-storestrong.ll @@ -256,6 +256,25 @@ define i8* @test13(i8* %a0, i8* %a1, i8** %addr, i8* %new) { ret i8* %retained } +; Cannot form a storeStrong call because it's unsafe to move the release call to +; the store. + +; CHECK-LABEL: define void @test14( +; CHECK: %[[V0:.*]] = load i8*, i8** %a +; CHECK: %[[V1:.*]] = call i8* @llvm.objc.retain(i8* %p) +; CHECK: store i8* %[[V1]], i8** %a +; CHECK: %[[V2:.*]] = call i8* @llvm.objc.retain(i8* %[[V0]]) +; CHECK: call void @llvm.objc.release(i8* %[[V2]]) + +define void @test14(i8** %a, i8* %p) { + %v0 = load i8*, i8** %a, align 8 + %v1 = call i8* @llvm.objc.retain(i8* %p) + store i8* %p, i8** %a, align 8 + %v2 = call i8* @llvm.objc.retain(i8* %v0) + call void @llvm.objc.release(i8* %v0) + ret void +} + !0 = !{} ; CHECK: attributes [[NUW]] = { nounwind } </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tk1/llvm-release-arm-spec2k6-O2_LTO - Build # 11 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O2_LTO. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O2_LTO Culprit: <cut> commit 176379e0c8f9dbde2b357fb3b6a6802b83282e71 Author: Alex Zinenko <zinenko(a)google.com> Date: Fri Feb 12 12:53:27 2021 +0100 [mlir] Use the interface-based translation for LLVM "intrinsic" dialects Port the translation of five dialects that define LLVM IR intrinsics (LLVMAVX512, LLVMArmNeon, LLVMArmSVE, NVVM, ROCDL) to the new dialect interface-based mechanism. This allows us to remove individual translations that were created for each of these dialects and just use one common MLIR-to-LLVM-IR translation that potentially supports all dialects instead, based on what is registered and including any combination of translatable dialects. This removal was one of the main goals of the refactoring. To support the addition of GPU-related metadata, the translation interface is extended with the `amendOperation` function that allows the interface implementation to post-process any translated operation with dialect attributes from the dialect for which the interface is implemented regardless of the operation's dialect. This is currently applied to "kernel" functions, but can be used to construct other metadata in dialect-specific ways without necessarily affecting operations. Depends On D96591, D96504 Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D96592 </cut> Results regressed to (for first_bad == 176379e0c8f9dbde2b357fb3b6a6802b83282e71) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2_LTO_marm artifacts/build-176379e0c8f9dbde2b357fb3b6a6802b83282e71/results_id: 1 # 482.sphinx3,sphinx_livepretend_base.default regressed by 109 from (for last_good == 2d728bbff5c688284b8b8306ecfd3000b0ab8bb1) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2_LTO_marm artifacts/build-2d728bbff5c688284b8b8306ecfd3000b0ab8bb1/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of last_good: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O2_LTO/4128 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of first_bad: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O2_LTO/4125 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-176379e0c8f9dbde2b357fb3b6a6802b83282e71 cd investigate-llvm-176379e0c8f9dbde2b357fb3b6a6802b83282e71 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach 176379e0c8f9dbde2b357fb3b6a6802b83282e71 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 2d728bbff5c688284b8b8306ecfd3000b0ab8bb1 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Full commit (up to 1000 lines): <cut> commit 176379e0c8f9dbde2b357fb3b6a6802b83282e71 Author: Alex Zinenko <zinenko(a)google.com> Date: Fri Feb 12 12:53:27 2021 +0100 [mlir] Use the interface-based translation for LLVM "intrinsic" dialects Port the translation of five dialects that define LLVM IR intrinsics (LLVMAVX512, LLVMArmNeon, LLVMArmSVE, NVVM, ROCDL) to the new dialect interface-based mechanism. This allows us to remove individual translations that were created for each of these dialects and just use one common MLIR-to-LLVM-IR translation that potentially supports all dialects instead, based on what is registered and including any combination of translatable dialects. This removal was one of the main goals of the refactoring. To support the addition of GPU-related metadata, the translation interface is extended with the `amendOperation` function that allows the interface implementation to post-process any translated operation with dialect attributes from the dialect for which the interface is implemented regardless of the operation's dialect. This is currently applied to "kernel" functions, but can be used to construct other metadata in dialect-specific ways without necessarily affecting operations. Depends On D96591, D96504 Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D96592 --- .../test/Standalone/standalone-translate.mlir | 3 - mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td | 13 ++- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 3 +- mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 2 +- mlir/include/mlir/InitAllTranslations.h | 10 -- mlir/include/mlir/Target/LLVMIR.h | 4 +- .../LLVMAVX512/LLVMAVX512ToLLVMIRTranslation.h | 37 ++++++ .../LLVMArmNeon/LLVMArmNeonToLLVMIRTranslation.h | 37 ++++++ .../LLVMArmSVE/LLVMArmSVEToLLVMIRTranslation.h | 37 ++++++ .../Dialect/LLVMIR/LLVMToLLVMIRTranslation.h | 4 +- .../LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h | 42 +++++++ .../Dialect/OpenMP/OpenMPToLLVMIRTranslation.h | 4 +- .../Dialect/ROCDL/ROCDLToLLVMIRTranslation.h | 42 +++++++ .../mlir/Target/LLVMIR/LLVMTranslationInterface.h | 28 ++++- .../include/mlir/Target/LLVMIR/ModuleTranslation.h | 32 ++++-- mlir/include/mlir/Target/NVVMIR.h | 39 ------- mlir/include/mlir/Target/ROCDLIR.h | 40 ------- mlir/lib/Target/CMakeLists.txt | 107 +---------------- mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp | 35 +++++- mlir/lib/Target/LLVMIR/ConvertToNVVMIR.cpp | 124 -------------------- mlir/lib/Target/LLVMIR/ConvertToROCDLIR.cpp | 127 --------------------- mlir/lib/Target/LLVMIR/Dialect/CMakeLists.txt | 5 + .../LLVMIR/Dialect/LLVMAVX512/CMakeLists.txt | 16 +++ .../LLVMAVX512/LLVMAVX512ToLLVMIRTranslation.cpp | 33 ++++++ .../LLVMIR/Dialect/LLVMArmNeon/CMakeLists.txt | 16 +++ .../LLVMArmNeon/LLVMArmNeonToLLVMIRTranslation.cpp | 33 ++++++ .../LLVMIR/Dialect/LLVMArmSVE/CMakeLists.txt | 16 +++ .../LLVMArmSVE/LLVMArmSVEToLLVMIRTranslation.cpp | 33 ++++++ .../Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp | 71 ++++-------- mlir/lib/Target/LLVMIR/Dialect/NVVM/CMakeLists.txt | 16 +++ .../Dialect/NVVM/NVVMToLLVMIRTranslation.cpp | 67 +++++++++++ .../lib/Target/LLVMIR/Dialect/ROCDL/CMakeLists.txt | 16 +++ .../Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp | 69 +++++++++++ mlir/lib/Target/LLVMIR/LLVMAVX512Intr.cpp | 65 ----------- mlir/lib/Target/LLVMIR/LLVMArmNeonIntr.cpp | 65 ----------- mlir/lib/Target/LLVMIR/LLVMArmSVEIntr.cpp | 65 ----------- mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 65 +++++++---- mlir/test/Target/arm-neon.mlir | 2 +- mlir/test/Target/arm-sve.mlir | 2 +- mlir/test/Target/avx512.mlir | 2 +- mlir/test/Target/nvvmir.mlir | 2 +- mlir/test/Target/rocdl.mlir | 2 +- mlir/test/lib/Transforms/CMakeLists.txt | 13 ++- .../lib/Transforms/TestConvertGPUKernelToCubin.cpp | 25 +++- .../lib/Transforms/TestConvertGPUKernelToHsaco.cpp | 22 +++- mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp | 6 +- mlir/tools/mlir-rocm-runner/mlir-rocm-runner.cpp | 3 + mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp | 10 +- 48 files changed, 750 insertions(+), 760 deletions(-) diff --git a/mlir/examples/standalone/test/Standalone/standalone-translate.mlir b/mlir/examples/standalone/test/Standalone/standalone-translate.mlir index 2a096c38e128..16d49785ee16 100644 --- a/mlir/examples/standalone/test/Standalone/standalone-translate.mlir +++ b/mlir/examples/standalone/test/Standalone/standalone-translate.mlir @@ -1,8 +1,5 @@ // RUN: standalone-translate --help | FileCheck %s -// CHECK: --avx512-mlir-to-llvmir // CHECK: --deserialize-spirv // CHECK: --import-llvm // CHECK: --mlir-to-llvmir -// CHECK: --mlir-to-nvvmir -// CHECK: --mlir-to-rocdlir // CHECK: --serialize-spirv diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td index ad886e55b4e6..541f7ebfadfa 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td @@ -219,12 +219,13 @@ class ListIntSubst<string pattern, list<int> values> { // or result in the operation. def LLVM_IntrPatterns { string operand = - [{convertType(opInst.getOperand($0).getType())}]; + [{moduleTranslation.convertType(opInst.getOperand($0).getType())}]; string result = - [{convertType(opInst.getResult($0).getType())}]; + [{moduleTranslation.convertType(opInst.getResult($0).getType())}]; string structResult = - [{convertType(opInst.getResult(0).getType().cast<LLVM::LLVMStructType>() - .getBody()[$0])}]; + [{moduleTranslation.convertType( + opInst.getResult(0).getType().cast<LLVM::LLVMStructType>() + .getBody()[$0])}]; } @@ -259,7 +260,7 @@ class LLVM_IntrOpBase<Dialect dialect, string opName, string enumName, ListIntSubst<LLVM_IntrPatterns.operand, overloadedOperands>.lst), ", ") # [{ }); - auto operands = lookupValues(opInst.getOperands()); + auto operands = moduleTranslation.lookupValues(opInst.getOperands()); }] # !if(!gt(numResults, 0), "$res = ", "") # [{builder.CreateCall(fn, operands); }]; @@ -325,7 +326,7 @@ class LLVM_VectorReductionAcc<string mnem> { }] # !interleave(ListIntSubst<LLVM_IntrPatterns.operand, [1]>.lst, ", ") # [{ }); - auto operands = lookupValues(opInst.getOperands()); + auto operands = moduleTranslation.lookupValues(opInst.getOperands()); llvm::FastMathFlags origFM = builder.getFastMathFlags(); llvm::FastMathFlags tempFM = origFM; tempFM.setAllowReassoc($reassoc); diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index 7a2152b9a481..7dca08a43f7a 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -1083,7 +1083,8 @@ def LLVM_UndefOp : LLVM_Op<"mlir.undef", [NoSideEffect]>, def LLVM_ConstantOp : LLVM_Op<"mlir.constant", [NoSideEffect]>, - LLVM_Builder<"$res = getLLVMConstant($_resultType, $value, $_location);"> + LLVM_Builder<[{$res = getLLVMConstant($_resultType, $value, $_location, + moduleTranslation);}]> { let summary = "Defines a constant of LLVM type."; let description = [{ diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td index d5eec3cebb4a..cfb08ff465a2 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td @@ -175,7 +175,7 @@ def ROCDL_MubufStoreOp : LLVM_Type:$glc, LLVM_Type:$slc)>{ string llvmBuilder = [{ - auto vdataType = convertType(op.vdata().getType()); + auto vdataType = moduleTranslation.convertType(op.vdata().getType()); createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_buffer_store, {$vdata, $rsrc, $vindex, $offset, $glc, $slc}, {vdataType}); diff --git a/mlir/include/mlir/InitAllTranslations.h b/mlir/include/mlir/InitAllTranslations.h index 16dd113d14cd..fc319c09a8c8 100644 --- a/mlir/include/mlir/InitAllTranslations.h +++ b/mlir/include/mlir/InitAllTranslations.h @@ -20,11 +20,6 @@ void registerFromLLVMIRTranslation(); void registerFromSPIRVTranslation(); void registerToLLVMIRTranslation(); void registerToSPIRVTranslation(); -void registerToNVVMIRTranslation(); -void registerToROCDLIRTranslation(); -void registerArmNeonToLLVMIRTranslation(); -void registerAVX512ToLLVMIRTranslation(); -void registerArmSVEToLLVMIRTranslation(); // This function should be called before creating any MLIRContext if one // expects all the possible translations to be made available to the context @@ -35,11 +30,6 @@ inline void registerAllTranslations() { registerFromSPIRVTranslation(); registerToLLVMIRTranslation(); registerToSPIRVTranslation(); - registerToNVVMIRTranslation(); - registerToROCDLIRTranslation(); - registerArmNeonToLLVMIRTranslation(); - registerAVX512ToLLVMIRTranslation(); - registerArmSVEToLLVMIRTranslation(); return true; }(); (void)initOnce; diff --git a/mlir/include/mlir/Target/LLVMIR.h b/mlir/include/mlir/Target/LLVMIR.h index 2050c63df73f..10bec79f3506 100644 --- a/mlir/include/mlir/Target/LLVMIR.h +++ b/mlir/include/mlir/Target/LLVMIR.h @@ -28,14 +28,14 @@ namespace mlir { class DialectRegistry; class OwningModuleRef; class MLIRContext; -class ModuleOp; +class Operation; /// Convert the given MLIR module into LLVM IR. The LLVM context is extracted /// from the registered LLVM IR dialect. In case of error, report it /// to the error handler registered with the MLIR context, if any (obtained from /// the MLIR module), and return `nullptr`. std::unique_ptr<llvm::Module> -translateModuleToLLVMIR(ModuleOp m, llvm::LLVMContext &llvmContext, +translateModuleToLLVMIR(Operation *op, llvm::LLVMContext &llvmContext, StringRef name = "LLVMDialectModule"); /// Convert the given LLVM module into MLIR's LLVM dialect. The LLVM context is diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/LLVMAVX512/LLVMAVX512ToLLVMIRTranslation.h b/mlir/include/mlir/Target/LLVMIR/Dialect/LLVMAVX512/LLVMAVX512ToLLVMIRTranslation.h new file mode 100644 index 000000000000..e591a95f0357 --- /dev/null +++ b/mlir/include/mlir/Target/LLVMIR/Dialect/LLVMAVX512/LLVMAVX512ToLLVMIRTranslation.h @@ -0,0 +1,37 @@ +//===- LLVMAVX512ToLLVMIRTranslation.h - LLVMAVX512 to LLVM IR --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the dialect interface for translating the LLVMAVX512 +// dialect to LLVM IR. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TARGET_LLVMIR_DIALECT_LLVMAVX512_LLVMAVX512TOLLVMIRTRANSLATION_H +#define MLIR_TARGET_LLVMIR_DIALECT_LLVMAVX512_LLVMAVX512TOLLVMIRTRANSLATION_H + +#include "mlir/Target/LLVMIR/LLVMTranslationInterface.h" + +namespace mlir { + +/// Implementation of the dialect interface that converts operations belonging +/// to the LLVMAVX512 dialect to LLVM IR. +class LLVMAVX512DialectLLVMIRTranslationInterface + : public LLVMTranslationDialectInterface { +public: + using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; + + /// Translates the given operation to LLVM IR using the provided IR builder + /// and saving the state in `moduleTranslation`. + LogicalResult + convertOperation(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) const final; +}; + +} // namespace mlir + +#endif // MLIR_TARGET_LLVMIR_DIALECT_LLVMAVX512_LLVMAVX512TOLLVMIRTRANSLATION_H diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/LLVMArmNeon/LLVMArmNeonToLLVMIRTranslation.h b/mlir/include/mlir/Target/LLVMIR/Dialect/LLVMArmNeon/LLVMArmNeonToLLVMIRTranslation.h new file mode 100644 index 000000000000..7d268d155083 --- /dev/null +++ b/mlir/include/mlir/Target/LLVMIR/Dialect/LLVMArmNeon/LLVMArmNeonToLLVMIRTranslation.h @@ -0,0 +1,37 @@ +//===- LLVMArmNeonToLLVMIRTranslation.h - LLVMArmNeon to LLVMIR -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the dialect interface for translating the LLVMArmNeon +// dialect to LLVM IR. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TARGET_LLVMIR_DIALECT_LLVMARMNEON_LLVMARMNEONTOLLVMIRTRANSLATION_H +#define MLIR_TARGET_LLVMIR_DIALECT_LLVMARMNEON_LLVMARMNEONTOLLVMIRTRANSLATION_H + +#include "mlir/Target/LLVMIR/LLVMTranslationInterface.h" + +namespace mlir { + +/// Implementation of the dialect interface that converts operations belonging +/// to the LLVMArmNeon dialect to LLVM IR. +class LLVMArmNeonDialectLLVMIRTranslationInterface + : public LLVMTranslationDialectInterface { +public: + using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; + + /// Translates the given operation to LLVM IR using the provided IR builder + /// and saving the state in `moduleTranslation`. + LogicalResult + convertOperation(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) const final; +}; + +} // namespace mlir + +#endif // MLIR_TARGET_LLVMIR_DIALECT_LLVMARMNEON_LLVMARMNEONTOLLVMIRTRANSLATION_H diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/LLVMArmSVE/LLVMArmSVEToLLVMIRTranslation.h b/mlir/include/mlir/Target/LLVMIR/Dialect/LLVMArmSVE/LLVMArmSVEToLLVMIRTranslation.h new file mode 100644 index 000000000000..9d4d05b9b9bd --- /dev/null +++ b/mlir/include/mlir/Target/LLVMIR/Dialect/LLVMArmSVE/LLVMArmSVEToLLVMIRTranslation.h @@ -0,0 +1,37 @@ +//===- LLVMArmSVEToLLVMIRTranslation.h - LLVMArmSVE to LLVM IR --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the dialect interface for translating the LLVMArmSVE +// dialect to LLVM IR. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TARGET_LLVMIR_DIALECT_LLVMARMSVE_LLVMARMSVETOLLVMIRTRANSLATION_H +#define MLIR_TARGET_LLVMIR_DIALECT_LLVMARMSVE_LLVMARMSVETOLLVMIRTRANSLATION_H + +#include "mlir/Target/LLVMIR/LLVMTranslationInterface.h" + +namespace mlir { + +/// Implementation of the dialect interface that converts operations belonging +/// to the LLVMArmSVE dialect to LLVM IR. +class LLVMArmSVEDialectLLVMIRTranslationInterface + : public LLVMTranslationDialectInterface { +public: + using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; + + /// Translates the given operation to LLVM IR using the provided IR builder + /// and saving the state in `moduleTranslation`. + LogicalResult + convertOperation(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) const final; +}; + +} // namespace mlir + +#endif // MLIR_TARGET_LLVMIR_DIALECT_LLVMARMSVE_LLVMARMSVETOLLVMIRTRANSLATION_H diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h b/mlir/include/mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h index 8b72cedf1ff2..2af76e092917 100644 --- a/mlir/include/mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h +++ b/mlir/include/mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h @@ -18,8 +18,8 @@ namespace mlir { -/// Implementation of the dialect interface that converts operations beloning to -/// the LLVM dialect to LLVM IR. +/// Implementation of the dialect interface that converts operations belonging +/// to the LLVM dialect to LLVM IR. class LLVMDialectLLVMIRTranslationInterface : public LLVMTranslationDialectInterface { public: diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h b/mlir/include/mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h new file mode 100644 index 000000000000..3a8a01df84b0 --- /dev/null +++ b/mlir/include/mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h @@ -0,0 +1,42 @@ +//===- NVVMToLLVMIRTranslation.h - NVVM to LLVM IR --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the dialect interface for translating the NVVM +// dialect to LLVM IR. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TARGET_LLVMIR_DIALECT_NVVM_NVVMTOLLVMIRTRANSLATION_H +#define MLIR_TARGET_LLVMIR_DIALECT_NVVM_NVVMTOLLVMIRTRANSLATION_H + +#include "mlir/Target/LLVMIR/LLVMTranslationInterface.h" + +namespace mlir { + +/// Implementation of the dialect interface that converts operations belonging +/// to the NVVM dialect to LLVM IR. +class NVVMDialectLLVMIRTranslationInterface + : public LLVMTranslationDialectInterface { +public: + using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; + + /// Translates the given operation to LLVM IR using the provided IR builder + /// and saving the state in `moduleTranslation`. + LogicalResult + convertOperation(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) const final; + + /// Attaches module-level metadata for functions marked as kernels. + LogicalResult + amendOperation(Operation *op, NamedAttribute attribute, + LLVM::ModuleTranslation &moduleTranslation) const final; +}; + +} // namespace mlir + +#endif // MLIR_TARGET_LLVMIR_DIALECT_NVVM_NVVMTOLLVMIRTRANSLATION_H diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h b/mlir/include/mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h index 7d9eeea9462e..07721d089689 100644 --- a/mlir/include/mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h +++ b/mlir/include/mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h @@ -18,8 +18,8 @@ namespace mlir { -/// Implementation of the dialect interface that converts operations beloning to -/// the OpenMP dialect to LLVM IR. +/// Implementation of the dialect interface that converts operations belonging +/// to the OpenMP dialect to LLVM IR. class OpenMPDialectLLVMIRTranslationInterface : public LLVMTranslationDialectInterface { public: diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h b/mlir/include/mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h new file mode 100644 index 000000000000..e2211a59098f --- /dev/null +++ b/mlir/include/mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h @@ -0,0 +1,42 @@ +//===- ROCDLToLLVMIRTranslation.h - ROCDL to LLVM IR ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the dialect interface for translating the ROCDL +// dialect to LLVM IR. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TARGET_LLVMIR_DIALECT_ROCDL_ROCDLTOLLVMIRTRANSLATION_H +#define MLIR_TARGET_LLVMIR_DIALECT_ROCDL_ROCDLTOLLVMIRTRANSLATION_H + +#include "mlir/Target/LLVMIR/LLVMTranslationInterface.h" + +namespace mlir { + +/// Implementation of the dialect interface that converts operations belonging +/// to the ROCDL dialect to LLVM IR. +class ROCDLDialectLLVMIRTranslationInterface + : public LLVMTranslationDialectInterface { +public: + using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; + + /// Translates the given operation to LLVM IR using the provided IR builder + /// and saving the state in `moduleTranslation`. + LogicalResult + convertOperation(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) const final; + + /// Attaches module-level metadata for functions marked as kernels. + LogicalResult + amendOperation(Operation *op, NamedAttribute attribute, + LLVM::ModuleTranslation &moduleTranslation) const final; +}; + +} // namespace mlir + +#endif // MLIR_TARGET_LLVMIR_DIALECT_ROCDL_ROCDLTOLLVMIRTRANSLATION_H diff --git a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h index 0063beac2977..0c563e6e7d39 100644 --- a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h +++ b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h @@ -13,12 +13,14 @@ #ifndef MLIR_TARGET_LLVMIR_LLVMTRANSLATIONINTERFACE_H #define MLIR_TARGET_LLVMIR_LLVMTRANSLATIONINTERFACE_H +#include "mlir/IR/Attributes.h" #include "mlir/IR/DialectInterface.h" +#include "mlir/IR/Identifier.h" #include "mlir/Support/LogicalResult.h" namespace llvm { class IRBuilderBase; -} +} // namespace llvm namespace mlir { namespace LLVM { @@ -43,6 +45,18 @@ public: LLVM::ModuleTranslation &moduleTranslation) const { return failure(); } + + /// Hook for derived dialect interface to act on an operation that has dialect + /// attributes from the derived dialect (the operation itself may be from a + /// different dialect). This gets called after the operation has been + /// translated. The hook is expected to use moduleTranslation to look up the + /// translation results and amend the corresponding IR constructs. Does + /// nothing and succeeds by default. + virtual LogicalResult + amendOperation(Operation *op, NamedAttribute attribute, + LLVM::ModuleTranslation &moduleTranslation) const { + return success(); + } }; /// Interface collection for translation to LLVM IR, dispatches to a concrete @@ -61,6 +75,18 @@ public: return iface->convertOperation(op, builder, moduleTranslation); return failure(); } + + /// Acts on the given operation using the interface implemented by the dialect + /// of one of the operation's dialect attributes. + virtual LogicalResult + amendOperation(Operation *op, NamedAttribute attribute, + LLVM::ModuleTranslation &moduleTranslation) const { + if (const LLVMTranslationDialectInterface *iface = + getInterfaceFor(attribute.first.getDialect())) { + return iface->amendOperation(op, attribute, moduleTranslation); + } + return success(); + } }; } // namespace mlir diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h index 03b7f5336461..004524f33fa4 100644 --- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h +++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h @@ -142,18 +142,11 @@ public: /// Looks up remapped a list of remapped values. SmallVector<llvm::Value *, 8> lookupValues(ValueRange values); - /// Create an LLVM IR constant of `llvmType` from the MLIR attribute `attr`. - /// This currently supports integer, floating point, splat and dense element - /// attributes and combinations thereof. In case of error, report it to `loc` - /// and return nullptr. - llvm::Constant *getLLVMConstant(llvm::Type *llvmType, Attribute attr, - Location loc); - /// Returns the MLIR context of the module being translated. MLIRContext &getContext() { return *mlirModule->getContext(); } /// Returns the LLVM context in which the IR is being constructed. - llvm::LLVMContext &getLLVMContext() { return llvmModule->getContext(); } + llvm::LLVMContext &getLLVMContext() const { return llvmModule->getContext(); } /// Finds an LLVM IR global value that corresponds to the given MLIR operation /// defining a global value. @@ -184,6 +177,10 @@ public: LogicalResult convertBlock(Block &bb, bool ignoreArguments, llvm::IRBuilder<> &builder); + /// Gets the named metadata in the LLVM IR module being constructed, creating + /// it if it does not exist. + llvm::NamedMDNode *getOrInsertNamedModuleMetadata(StringRef name); + protected: /// Translate the given MLIR module expressed in MLIR LLVM IR dialect into an /// LLVM IR module. The MLIR LLVM IR dialect holds a pointer to an @@ -208,6 +205,9 @@ private: LogicalResult convertGlobals(); LogicalResult convertOneFunction(LLVMFuncOp func); + /// Translates dialect attributes attached to the given operation. + LogicalResult convertDialectAttributes(Operation *op); + /// Original and translated module. Operation *mlirModule; std::unique_ptr<llvm::Module> llvmModule; @@ -228,6 +228,8 @@ private: /// A stateful object used to translate types. TypeToLLVMIRTranslator typeTranslator; + /// A dialect interface collection used for dispatching the translation to + /// specific dialects. LLVMTranslationInterface iface; /// Mappings between original and translated values, used for lookups. @@ -249,6 +251,20 @@ void connectPHINodes(Region &region, const ModuleTranslation &state); /// Get a topologically sorted list of blocks of the given region. llvm::SetVector<Block *> getTopologicallySortedBlocks(Region &region); + +/// Create an LLVM IR constant of `llvmType` from the MLIR attribute `attr`. +/// This currently supports integer, floating point, splat and dense element +/// attributes and combinations thereof. In case of error, report it to `loc` +/// and return nullptr. +llvm::Constant *getLLVMConstant(llvm::Type *llvmType, Attribute attr, + Location loc, + const ModuleTranslation &moduleTranslation); + +/// Creates a call to an LLVM IR intrinsic function with the given arguments. +llvm::Value *createIntrinsicCall(llvm::IRBuilderBase &builder, + llvm::Intrinsic::ID intrinsic, + ArrayRef<llvm::Value *> args = {}, + ArrayRef<llvm::Type *> tys = {}); } // namespace detail } // namespace LLVM diff --git a/mlir/include/mlir/Target/NVVMIR.h b/mlir/include/mlir/Target/NVVMIR.h deleted file mode 100644 index 0cd7688e275b..000000000000 --- a/mlir/include/mlir/Target/NVVMIR.h +++ /dev/null @@ -1,39 +0,0 @@ -//===- NVVMIR.h - MLIR to LLVM + NVVM IR conversion -------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file declares the entry point for the MLIR to LLVM + NVVM IR conversion. -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_TARGET_NVVMIR_H -#define MLIR_TARGET_NVVMIR_H - -#include "llvm/ADT/StringRef.h" -#include <memory> - -// Forward-declare LLVM classes. -namespace llvm { -class LLVMContext; -class Module; -} // namespace llvm - -namespace mlir { -class Operation; - -/// Convert the given LLVM-module-like operation into NVVM IR. This conversion -/// requires the registration of the LLVM IR dialect and will extract the LLVM -/// context from the registered LLVM IR dialect. In case of error, report it to -/// the error handler registered with the MLIR context, if any (obtained from -/// the MLIR module), and return `nullptr`. -std::unique_ptr<llvm::Module> -translateModuleToNVVMIR(Operation *m, llvm::LLVMContext &llvmContext, - llvm::StringRef name = "LLVMDialectModule"); - -} // namespace mlir - -#endif // MLIR_TARGET_NVVMIR_H diff --git a/mlir/include/mlir/Target/ROCDLIR.h b/mlir/include/mlir/Target/ROCDLIR.h deleted file mode 100644 index e2cb812a173d..000000000000 --- a/mlir/include/mlir/Target/ROCDLIR.h +++ /dev/null @@ -1,40 +0,0 @@ -//===- ROCDLIR.h - MLIR to LLVM + ROCDL IR conversion -----------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file declares the entry point for the MLIR to LLVM + ROCDL IR -// conversion. -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_TARGET_ROCDLIR_H -#define MLIR_TARGET_ROCDLIR_H - -#include "llvm/ADT/StringRef.h" -#include <memory> - -// Forward-declare LLVM classes. -namespace llvm { -class LLVMContext; -class Module; -} // namespace llvm - -namespace mlir { -class Operation; - -/// Convert the given LLVM-module-like operation into ROCDL IR. This conversion -/// requires the registration of the LLVM IR dialect and will extract the LLVM -/// context from the registered LLVM IR dialect. In case of error, report it to -/// the error handler registered with the MLIR context, if any (obtained from -/// the MLIR module), and return `nullptr`. -std::unique_ptr<llvm::Module> -translateModuleToROCDLIR(Operation *m, llvm::LLVMContext &llvmContext, - llvm::StringRef name = "LLVMDialectModule"); - -} // namespace mlir - -#endif // MLIR_TARGET_ROCDLIR_H diff --git a/mlir/lib/Target/CMakeLists.txt b/mlir/lib/Target/CMakeLists.txt index e951ffade6aa..a23222d37ede 100644 --- a/mlir/lib/Target/CMakeLists.txt +++ b/mlir/lib/Target/CMakeLists.txt @@ -24,26 +24,6 @@ add_mlir_translation_library(MLIRTargetLLVMIRModuleTranslation MLIRTranslation ) -add_mlir_translation_library(MLIRTargetAVX512 - LLVMIR/LLVMAVX512Intr.cpp - - ADDITIONAL_HEADER_DIRS - ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR - - DEPENDS - MLIRLLVMAVX512ConversionsIncGen - - LINK_COMPONENTS - Core - - LINK_LIBS PUBLIC - MLIRIR - MLIRLLVMAVX512 - MLIRLLVMIR - MLIRTargetLLVMIR - MLIRTargetLLVMIRModuleTranslation - ) - add_mlir_translation_library(MLIRTargetLLVMIR LLVMIR/ConvertFromLLVMIR.cpp LLVMIR/ConvertToLLVMIR.cpp @@ -56,89 +36,12 @@ add_mlir_translation_library(MLIRTargetLLVMIR IRReader LINK_LIBS PUBLIC + MLIRLLVMArmNeonToLLVMIRTranslation + MLIRLLVMArmSVEToLLVMIRTranslation + MLIRLLVMAVX512ToLLVMIRTranslation MLIRLLVMToLLVMIRTranslation + MLIRNVVMToLLVMIRTranslation MLIROpenMPToLLVMIRTranslation - MLIRTargetLLVMIRModuleTranslation - ) - -add_mlir_translation_library(MLIRTargetArmNeon - LLVMIR/LLVMArmNeonIntr.cpp - - ADDITIONAL_HEADER_DIRS - ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR - - DEPENDS - MLIRLLVMArmNeonConversionsIncGen - - LINK_COMPONENTS - Core - - LINK_LIBS PUBLIC - MLIRIR - MLIRLLVMArmNeon - MLIRLLVMIR - MLIRTargetLLVMIR - MLIRTargetLLVMIRModuleTranslation - ) - -add_mlir_translation_library(MLIRTargetArmSVE - LLVMIR/LLVMArmSVEIntr.cpp - - ADDITIONAL_HEADER_DIRS - ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR - - DEPENDS - MLIRLLVMArmSVEConversionsIncGen - - LINK_COMPONENTS - Core - - LINK_LIBS PUBLIC - MLIRIR - MLIRLLVMArmSVE - MLIRLLVMIR - MLIRTargetLLVMIR - MLIRTargetLLVMIRModuleTranslation - ) - -add_mlir_translation_library(MLIRTargetNVVMIR - LLVMIR/ConvertToNVVMIR.cpp - - ADDITIONAL_HEADER_DIRS - ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR - - DEPENDS - intrinsics_gen - - LINK_COMPONENTS - Core - - LINK_LIBS PUBLIC - MLIRGPU - MLIRIR - MLIRLLVMIR - MLIRNVVMIR - MLIRTargetLLVMIR - MLIRTargetLLVMIRModuleTranslation - ) - -add_mlir_translation_library(MLIRTargetROCDLIR - LLVMIR/ConvertToROCDLIR.cpp - - ADDITIONAL_HEADER_DIRS - ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR - - DEPENDS - intrinsics_gen - - LINK_COMPONENTS - Core - - LINK_LIBS PUBLIC - MLIRGPU - MLIRIR - MLIRLLVMIR - MLIRROCDLIR - MLIRTargetLLVMIR + MLIRROCDLToLLVMIRTranslation MLIRTargetLLVMIRModuleTranslation ) diff --git a/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp index 6b30748cc79b..42391513bacf 100644 --- a/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp +++ b/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp @@ -12,9 +12,19 @@ #include "mlir/Target/LLVMIR.h" +#include "mlir/Dialect/LLVMIR/LLVMAVX512Dialect.h" +#include "mlir/Dialect/LLVMIR/LLVMArmNeonDialect.h" +#include "mlir/Dialect/LLVMIR/LLVMArmSVEDialect.h" +#include "mlir/Dialect/LLVMIR/NVVMDialect.h" +#include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/Target/LLVMIR/Dialect/LLVMAVX512/LLVMAVX512ToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/LLVMArmNeon/LLVMArmNeonToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/LLVMArmSVE/LLVMArmSVEToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/ModuleTranslation.h" #include "mlir/Translation.h" @@ -26,14 +36,14 @@ using namespace mlir; std::unique_ptr<llvm::Module> -mlir::translateModuleToLLVMIR(ModuleOp m, llvm::LLVMContext &llvmContext, +mlir::translateModuleToLLVMIR(Operation *op, llvm::LLVMContext &llvmContext, StringRef name) { auto llvmModule = - LLVM::ModuleTranslation::translateModule<>(m, llvmContext, name); + LLVM::ModuleTranslation::translateModule<>(op, llvmContext, name); if (!llvmModule) - emitError(m.getLoc(), "Fail to convert MLIR to LLVM IR"); + emitError(op->getLoc(), "Fail to convert MLIR to LLVM IR"); else if (verifyModule(*llvmModule)) - emitError(m.getLoc(), "LLVM IR fails to verify"); + emitError(op->getLoc(), "LLVM IR fails to verify"); return llvmModule; } @@ -70,9 +80,24 @@ void registerToLLVMIRTranslation() { return success(); }, [](DialectRegistry &registry) { - registry.insert<omp::OpenMPDialect>(); + registry.insert<omp::OpenMPDialect, LLVM::LLVMAVX512Dialect, + LLVM::LLVMArmSVEDialect, LLVM::LLVMArmNeonDialect, + NVVM::NVVMDialect, ROCDL::ROCDLDialect>(); registry.addDialectInterface<omp::OpenMPDialect, OpenMPDialectLLVMIRTranslationInterface>(); + registry + .addDialectInterface<LLVM::LLVMAVX512Dialect, + LLVMAVX512DialectLLVMIRTranslationInterface>(); + registry.addDialectInterface< + LLVM::LLVMArmNeonDialect, + LLVMArmNeonDialectLLVMIRTranslationInterface>(); + registry + .addDialectInterface<LLVM::LLVMArmSVEDialect, + LLVMArmSVEDialectLLVMIRTranslationInterface>(); + registry.addDialectInterface<NVVM::NVVMDialect, + NVVMDialectLLVMIRTranslationInterface>(); + registry.addDialectInterface<ROCDL::ROCDLDialect, + ROCDLDialectLLVMIRTranslationInterface>(); registerLLVMDialectTranslation(registry); }); } diff --git a/mlir/lib/Target/LLVMIR/ConvertToNVVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertToNVVMIR.cpp deleted file mode 100644 index 7aee913a27d7..000000000000 --- a/mlir/lib/Target/LLVMIR/ConvertToNVVMIR.cpp +++ /dev/null @@ -1,124 +0,0 @@ -//===- ConvertToNVVMIR.cpp - MLIR to LLVM IR conversion -------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements a translation between the MLIR LLVM + NVVM dialects and -// LLVM IR with NVVM intrinsics and metadata. -// -//===----------------------------------------------------------------------===// - -#include "mlir/Target/NVVMIR.h" - -#include "mlir/Dialect/GPU/GPUDialect.h" -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" -#include "mlir/Dialect/LLVMIR/NVVMDialect.h" -#include "mlir/IR/BuiltinOps.h" -#include "mlir/Target/LLVMIR.h" -#include "mlir/Target/LLVMIR/ModuleTranslation.h" -#include "mlir/Translation.h" - -#include "llvm/ADT/StringRef.h" -#include "llvm/IR/IntrinsicsNVPTX.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/ToolOutputFile.h" - -using namespace mlir; - -static llvm::Value *createIntrinsicCall(llvm::IRBuilder<> &builder, - llvm::Intrinsic::ID intrinsic, - ArrayRef<llvm::Value *> args = {}) { - llvm::Module *module = builder.GetInsertBlock()->getModule(); - llvm::Function *fn = llvm::Intrinsic::getDeclaration(module, intrinsic); - return builder.CreateCall(fn, args); -} - -static llvm::Intrinsic::ID getShflBflyIntrinsicId(llvm::Type *resultType, - bool withPredicate) { - if (withPredicate) { - resultType = cast<llvm::StructType>(resultType)->getElementType(0); - return resultType->isFloatTy() ? llvm::Intrinsic::nvvm_shfl_sync_bfly_f32p - : llvm::Intrinsic::nvvm_shfl_sync_bfly_i32p; - } - return resultType->isFloatTy() ? llvm::Intrinsic::nvvm_shfl_sync_bfly_f32 - : llvm::Intrinsic::nvvm_shfl_sync_bfly_i32; -} - -namespace { -class ModuleTranslation : public LLVM::ModuleTranslation { -public: - using LLVM::ModuleTranslation::ModuleTranslation; - -protected: - LogicalResult convertOperation(Operation &opInst, - llvm::IRBuilder<> &builder) override { - -#include "mlir/Dialect/LLVMIR/NVVMConversions.inc" - - return LLVM::ModuleTranslation::convertOperation(opInst, builder); - } - - /// Allow access to the constructor. - friend LLVM::ModuleTranslation; -}; -} // namespace - -std::unique_ptr<llvm::Module> -mlir::translateModuleToNVVMIR(Operation *m, llvm::LLVMContext &llvmContext, - StringRef name) { - // Register the translation to LLVM IR if nobody else did before. This may - // happen if this translation is called inside a pass pipeline that converts - // GPU dialects to binary blobs without translating the rest of the code. - registerLLVMDialectTranslation(*m->getContext()); - - auto llvmModule = LLVM::ModuleTranslation::translateModule<ModuleTranslation>( - m, llvmContext, name); - if (!llvmModule) - return llvmModule; - - // Insert the nvvm.annotations kernel so that the NVVM backend recognizes the - // function as a kernel. - for (auto func : - ModuleTranslation::getModuleBody(m).getOps<LLVM::LLVMFuncOp>()) { - if (!func->getAttrOfType<UnitAttr>( - NVVM::NVVMDialect::getKernelFuncAttrName())) - continue; - - auto *llvmFunc = llvmModule->getFunction(func.getName()); - - llvm::Metadata *llvmMetadata[] = { - llvm::ValueAsMetadata::get(llvmFunc), - llvm::MDString::get(llvmModule->getContext(), "kernel"), - llvm::ValueAsMetadata::get(llvm::ConstantInt::get( - llvm::Type::getInt32Ty(llvmModule->getContext()), 1))}; - llvm::MDNode *llvmMetadataNode = - llvm::MDNode::get(llvmModule->getContext(), llvmMetadata); - llvmModule->getOrInsertNamedMetadata("nvvm.annotations") - ->addOperand(llvmMetadataNode); - } - - return llvmModule; -} - -namespace mlir { -void registerToNVVMIRTranslation() { - TranslateFromMLIRRegistration registration( - "mlir-to-nvvmir", - [](ModuleOp module, raw_ostream &output) { - llvm::LLVMContext llvmContext; - auto llvmModule = mlir::translateModuleToNVVMIR(module, llvmContext); - if (!llvmModule) </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_apm/llvm-master-aarch64-spec2k6-Oz - Build # 8 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *gcc* in CI configuration tcwg_bmk_llvm_apm/llvm-master-aarch64-spec2k6-Oz. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_apm/llvm-master-aarch64-spec2k6-Oz Culprit: <cut> commit 844105d912a4337635001fc2077404aefb90c4c6 Author: GCC Administrator <gccadmin(a)gcc.gnu.org> Date: Mon Aug 9 00:16:32 2021 +0000 Daily bump. </cut> Results regressed to (for first_bad == 844105d912a4337635001fc2077404aefb90c4c6) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Oz artifacts/build-844105d912a4337635001fc2077404aefb90c4c6/results_id: 1 # 482.sphinx3,[.] OUTLINED_FUNCTION_4 regressed by 117 from (for last_good == 5f564fd013327977a4d04aa520237d17f641f01a) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Oz artifacts/build-5f564fd013327977a4d04aa520237d17f641f01a/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Results ID of last_good: apm_64/tcwg_bmk_llvm_apm/bisect-llvm-master-aarch64-spec2k6-Oz/4114 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Results ID of first_bad: apm_64/tcwg_bmk_llvm_apm/bisect-llvm-master-aarch64-spec2k6-Oz/4109 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Configuration details: Reproduce builds: <cut> mkdir investigate-gcc-844105d912a4337635001fc2077404aefb90c4c6 cd investigate-gcc-844105d912a4337635001fc2077404aefb90c4c6 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /gcc/ ./ ./bisect/baseline/ cd gcc # Reproduce first_bad build git checkout --detach 844105d912a4337635001fc2077404aefb90c4c6 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 5f564fd013327977a4d04aa520237d17f641f01a ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Full commit (up to 1000 lines): <cut> commit 844105d912a4337635001fc2077404aefb90c4c6 Author: GCC Administrator <gccadmin(a)gcc.gnu.org> Date: Mon Aug 9 00:16:32 2021 +0000 Daily bump. --- gcc/ChangeLog | 4 ++++ gcc/DATESTAMP | 2 +- gcc/testsuite/ChangeLog | 4 ++++ libstdc++-v3/ChangeLog | 18 ++++++++++++++++++ 4 files changed, 27 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3b0d1b06e9c..9d39f0f0064 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,7 @@ +2021-08-08 Sergei Trofimovich <siarheit(a)google.com> + + * lra-constraints.c: Fix s/otput/output/ typo. + 2021-08-06 Martin Sebor <msebor(a)redhat.com> * builtins.c (expand_builtin_memchr): Move to gimple-ssa-warn-access.cc. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 3b0c65fa4e8..859da5af87c 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20210808 +20210809 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index fa9298d30df..034fc300a63 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2021-08-08 Jeff Law <jlaw(a)localhost.localdomain> + + * gcc.target/tic6x/rotdi16-scan.c: Pull rotate into its own function. + 2021-08-07 H.J. Lu <hjl.tools(a)gmail.com> PR tree-optimization/88531 diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index da75afb8ebc..70fb007885a 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,21 @@ +2021-08-08 François Dumont <fdumont(a)gcc.gnu.org> + + * testsuite/25_algorithms/copy/debug/constexpr_neg.cc: Replace 'failed_assertion' + dg-prune-output reason with 'builtin_unreachable'. + * testsuite/25_algorithms/copy_backward/debug/constexpr_neg.cc: Likewise. + * testsuite/25_algorithms/equal/debug/constexpr_neg.cc: Likewise. + * testsuite/25_algorithms/lower_bound/debug/constexpr_partitioned_neg.cc: Likewise. + * testsuite/25_algorithms/lower_bound/debug/constexpr_partitioned_pred_neg.cc: Likewise. + * testsuite/25_algorithms/lower_bound/debug/constexpr_valid_range_neg.cc: Likewise. + * testsuite/25_algorithms/upper_bound/debug/constexpr_partitioned_neg.cc: Likewise. + * testsuite/25_algorithms/upper_bound/debug/constexpr_partitioned_pred_neg.cc: Likewise. + * testsuite/25_algorithms/upper_bound/debug/constexpr_valid_range_neg.cc: Likewise. + +2021-08-08 Hans-Peter Nilsson <hp(a)bitrange.com> + + * testsuite/std/ranges/iota/max_size_type.cc: Set + dg-timeout-factor to 4. + 2021-08-06 Jonathan Wakely <jwakely(a)redhat.com> * libsupc++/compare (compare_three_way, strong_order) </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/llvm-release-aarch64-spec2k6-O2_LTO - Build # 6 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-release-aarch64-spec2k6-O2_LTO. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tx1/llvm-release-aarch64-spec2k6-O2_LTO Culprit: <cut> commit f8ed31cd991bf84372369409595707f6e3cebbce Author: Jonny Farley <jonny.farley(a)sony.com> Date: Tue Feb 16 16:34:35 2021 +0000 [Fuzzer][Test] Use %python substitution for trace-malloc-unbalanced.test This test was found to fail for some of our downstream builds, on computers where python was not on the default $PATH. Therefore add a %python substitution to use sys.executable, based on similar solutions for python calls in tests elsewhere in LLVM. Differential Revision: https://reviews.llvm.org/D96799 </cut> Results regressed to (for first_bad == f8ed31cd991bf84372369409595707f6e3cebbce) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2_LTO artifacts/build-f8ed31cd991bf84372369409595707f6e3cebbce/results_id: 1 # 400.perlbench,perlbench_base.default regressed by 103 from (for last_good == cb2876800cc827431f4143c4fc5595c6c1191269) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2_LTO artifacts/build-cb2876800cc827431f4143c4fc5595c6c1191269/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-release-aarch64-spec2k6-O2_LTO/4095 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-release-aarch64-spec2k6-O2_LTO/4070 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-f8ed31cd991bf84372369409595707f6e3cebbce cd investigate-llvm-f8ed31cd991bf84372369409595707f6e3cebbce git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach f8ed31cd991bf84372369409595707f6e3cebbce ../artifacts/test.sh # Reproduce last_good build git checkout --detach cb2876800cc827431f4143c4fc5595c6c1191269 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Full commit (up to 1000 lines): <cut> commit f8ed31cd991bf84372369409595707f6e3cebbce Author: Jonny Farley <jonny.farley(a)sony.com> Date: Tue Feb 16 16:34:35 2021 +0000 [Fuzzer][Test] Use %python substitution for trace-malloc-unbalanced.test This test was found to fail for some of our downstream builds, on computers where python was not on the default $PATH. Therefore add a %python substitution to use sys.executable, based on similar solutions for python calls in tests elsewhere in LLVM. Differential Revision: https://reviews.llvm.org/D96799 --- compiler-rt/test/fuzzer/lit.cfg.py | 2 ++ compiler-rt/test/fuzzer/trace-malloc-unbalanced.test | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/compiler-rt/test/fuzzer/lit.cfg.py b/compiler-rt/test/fuzzer/lit.cfg.py index 00871b8a94a3..d114ae7937fa 100644 --- a/compiler-rt/test/fuzzer/lit.cfg.py +++ b/compiler-rt/test/fuzzer/lit.cfg.py @@ -61,6 +61,8 @@ config.substitutions.append(('%build_dir', config.cmake_binary_dir)) libfuzzer_src_root = os.path.join(config.compiler_rt_src_root, "lib", "fuzzer") config.substitutions.append(('%libfuzzer_src', libfuzzer_src_root)) +config.substitutions.append(('%python', '"%s"' % (sys.executable))) + def generate_compiler_cmd(is_cpp=True, fuzzer_enabled=True, msan_enabled=False): compiler_cmd = config.clang extra_cmd = config.target_flags diff --git a/compiler-rt/test/fuzzer/trace-malloc-unbalanced.test b/compiler-rt/test/fuzzer/trace-malloc-unbalanced.test index c7b4632140cb..395386ab5cd2 100644 --- a/compiler-rt/test/fuzzer/trace-malloc-unbalanced.test +++ b/compiler-rt/test/fuzzer/trace-malloc-unbalanced.test @@ -8,10 +8,10 @@ RUN: %cpp_compiler %S/TraceMallocTest.cpp -o %t-TraceMallocTest # Specify python because we can't use the shebang line on Windows. RUN: %run %t-TraceMallocTest -seed=1 -trace_malloc=1 -runs=200 2>&1 | \ -RUN: python %libfuzzer_src/scripts/unbalanced_allocs.py --skip=5 | FileCheck %s +RUN: %python %libfuzzer_src/scripts/unbalanced_allocs.py --skip=5 | FileCheck %s RUN: %run %t-TraceMallocTest -seed=1 -trace_malloc=2 -runs=200 2>&1 | \ -RUN: python %libfuzzer_src/scripts/unbalanced_allocs.py --skip=5 | FileCheck %s --check-prefixes=CHECK,CHECK2 +RUN: %python %libfuzzer_src/scripts/unbalanced_allocs.py --skip=5 | FileCheck %s --check-prefixes=CHECK,CHECK2 CHECK: MallocFreeTracer: START # Behavior of the format string "%p" is implementation defined. Account for the </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tk1/llvm-release-arm-spec2k6-O3_LTO - Build # 7 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O3_LTO. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O3_LTO Culprit: <cut> commit f343a730596b6b02039a91d71dc16c113d09cfe6 Author: Vitaly Buka <vitalybuka(a)google.com> Date: Fri Apr 2 00:17:45 2021 -0700 [NFC][scudo] Convert ScudoPrimaryTest into TYPED_TEST </cut> Results regressed to (for first_bad == f343a730596b6b02039a91d71dc16c113d09cfe6) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_LTO_marm artifacts/build-f343a730596b6b02039a91d71dc16c113d09cfe6/results_id: 1 # 462.libquantum,libquantum_base.default regressed by 104 from (for last_good == 28ea218417d713bcb399e9428e4c3f8f7960feb2) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_LTO_marm artifacts/build-28ea218417d713bcb399e9428e4c3f8f7960feb2/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of last_good: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O3_LTO/4078 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of first_bad: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O3_LTO/4076 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-f343a730596b6b02039a91d71dc16c113d09cfe6 cd investigate-llvm-f343a730596b6b02039a91d71dc16c113d09cfe6 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach f343a730596b6b02039a91d71dc16c113d09cfe6 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 28ea218417d713bcb399e9428e4c3f8f7960feb2 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Full commit (up to 1000 lines): <cut> commit f343a730596b6b02039a91d71dc16c113d09cfe6 Author: Vitaly Buka <vitalybuka(a)google.com> Date: Fri Apr 2 00:17:45 2021 -0700 [NFC][scudo] Convert ScudoPrimaryTest into TYPED_TEST --- .../lib/scudo/standalone/tests/primary_test.cpp | 73 +++++++++++++--------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp index 38bf67150853..07f3d6b77c17 100644 --- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp @@ -52,8 +52,7 @@ template <typename Primary> static void testPrimary() { Str.output(); } -template <typename SizeClassMapT> struct TestConfig1 { - using SizeClassMap = SizeClassMapT; +struct TestConfig1 { static const scudo::uptr PrimaryRegionSizeLog = 18U; static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; @@ -62,8 +61,7 @@ template <typename SizeClassMapT> struct TestConfig1 { static const scudo::uptr PrimaryCompactPtrScale = 0; }; -template <typename SizeClassMapT> struct TestConfig2 { - using SizeClassMap = SizeClassMapT; +struct TestConfig2 { static const scudo::uptr PrimaryRegionSizeLog = 24U; static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; @@ -72,8 +70,7 @@ template <typename SizeClassMapT> struct TestConfig2 { static const scudo::uptr PrimaryCompactPtrScale = 0; }; -template <typename SizeClassMapT> struct TestConfig3 { - using SizeClassMap = SizeClassMapT; +struct TestConfig3 { static const scudo::uptr PrimaryRegionSizeLog = 24U; static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; @@ -82,13 +79,43 @@ template <typename SizeClassMapT> struct TestConfig3 { static const scudo::uptr PrimaryCompactPtrScale = 0; }; -TEST(ScudoPrimaryTest, BasicPrimary) { - using SizeClassMap = scudo::DefaultSizeClassMap; +template <typename BaseConfig, typename SizeClassMapT> +struct Config : public BaseConfig { + using SizeClassMap = SizeClassMapT; +}; + +template <typename BaseConfig, typename SizeClassMapT> struct MakeAllocator { + using Value = scudo::SizeClassAllocator64<Config<BaseConfig, SizeClassMapT>>; +}; + +template <typename SizeClassMapT> +struct MakeAllocator<TestConfig1, SizeClassMapT> { + using Value = scudo::SizeClassAllocator32<Config<TestConfig1, SizeClassMapT>>; +}; + +namespace testing { +namespace internal { +#define SCUDO_DEFINE_GTEST_TYPE_NAME(TYPE) \ + template <> std::string GetTypeName<TYPE>() { return #TYPE; } +SCUDO_DEFINE_GTEST_TYPE_NAME(TestConfig1) +SCUDO_DEFINE_GTEST_TYPE_NAME(TestConfig2) +SCUDO_DEFINE_GTEST_TYPE_NAME(TestConfig3) +#undef SCUDO_DEFINE_GTEST_TYPE_NAME +} // namespace internal +} // namespace testing + +template <class BaseConfig> struct ScudoPrimaryTest : public ::testing::Test {}; + +using ScudoPrimaryTestTypes = testing::Types< #if !SCUDO_FUCHSIA - testPrimary<scudo::SizeClassAllocator32<TestConfig1<SizeClassMap>>>(); + TestConfig1, #endif - testPrimary<scudo::SizeClassAllocator64<TestConfig2<SizeClassMap>>>(); - testPrimary<scudo::SizeClassAllocator64<TestConfig3<SizeClassMap>>>(); + TestConfig2, TestConfig3>; +TYPED_TEST_CASE(ScudoPrimaryTest, ScudoPrimaryTestTypes); + +TYPED_TEST(ScudoPrimaryTest, BasicPrimary) { + using SizeClassMap = scudo::DefaultSizeClassMap; + testPrimary<typename MakeAllocator<TypeParam, SizeClassMap>::Value>(); } struct SmallRegionsConfig { @@ -178,13 +205,9 @@ template <typename Primary> static void testIteratePrimary() { Str.output(); } -TEST(ScudoPrimaryTest, PrimaryIterate) { +TYPED_TEST(ScudoPrimaryTest, PrimaryIterate) { using SizeClassMap = scudo::DefaultSizeClassMap; -#if !SCUDO_FUCHSIA - testIteratePrimary<scudo::SizeClassAllocator32<TestConfig1<SizeClassMap>>>(); -#endif - testIteratePrimary<scudo::SizeClassAllocator64<TestConfig2<SizeClassMap>>>(); - testIteratePrimary<scudo::SizeClassAllocator64<TestConfig3<SizeClassMap>>>(); + testIteratePrimary<typename MakeAllocator<TypeParam, SizeClassMap>::Value>(); } static std::mutex Mutex; @@ -239,13 +262,9 @@ template <typename Primary> static void testPrimaryThreaded() { Str.output(); } -TEST(ScudoPrimaryTest, PrimaryThreaded) { +TYPED_TEST(ScudoPrimaryTest, PrimaryThreaded) { using SizeClassMap = scudo::SvelteSizeClassMap; -#if !SCUDO_FUCHSIA - testPrimaryThreaded<scudo::SizeClassAllocator32<TestConfig1<SizeClassMap>>>(); -#endif - testPrimaryThreaded<scudo::SizeClassAllocator64<TestConfig2<SizeClassMap>>>(); - testPrimaryThreaded<scudo::SizeClassAllocator64<TestConfig3<SizeClassMap>>>(); + testPrimaryThreaded<typename MakeAllocator<TypeParam, SizeClassMap>::Value>(); } // Through a simple allocation that spans two pages, verify that releaseToOS @@ -270,11 +289,7 @@ template <typename Primary> static void testReleaseToOS() { EXPECT_GT(Allocator->releaseToOS(), 0U); } -TEST(ScudoPrimaryTest, ReleaseToOS) { +TYPED_TEST(ScudoPrimaryTest, ReleaseToOS) { using SizeClassMap = scudo::DefaultSizeClassMap; -#if !SCUDO_FUCHSIA - testReleaseToOS<scudo::SizeClassAllocator32<TestConfig1<SizeClassMap>>>(); -#endif - testReleaseToOS<scudo::SizeClassAllocator64<TestConfig2<SizeClassMap>>>(); - testReleaseToOS<scudo::SizeClassAllocator64<TestConfig3<SizeClassMap>>>(); + testReleaseToOS<typename MakeAllocator<TypeParam, SizeClassMap>::Value>(); } </cut>

3 years, 10 months

1
0
0 0

[ACTIVITY] report week ending 20 Aug

by Peter Maydell

Progress: * UM-2 [QEMU upstream maintainership] + We needed an rc4 (as usual) + Tried to work through some of my code review patchlog, notably some big alignment-related series from RTH + Sent patchseries for some small things: + implement last few bits of HSTR trap-to-hypervisor functionality + actually take an exception if PSTATE.IL gets set + don't assert if user asks for both an EL3 guest CPU and KVM * QEMU-406 [QEMU support for MVE (M-profile Vector Extension; Helium)] + worked through rth's code review comments for fp insn patches these are now ready to send out once QEMU makes its 6.1 release and the previous slice of reviewed patches can get into the tree -- PMM

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tk1/llvm-release-arm-spec2k6-O3 - Build # 10 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O3. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O3 Culprit: <cut> commit a838a4f69f500fc8e39fb4c9a1476f162ccf8423 Author: David Green <david.green(a)arm.com> Date: Mon Feb 15 13:17:21 2021 +0000 [ARM] Extend search for increment in load/store optimizer Currently the findIncDecAfter will only look at the next instruction for post-inc candidates in the load/store optimizer. This extends that to a search through the current BB, until an instruction that modifies or uses the increment reg is found. This allows more post-inc load/stores and ldm/stm's to be created, especially in cases where a schedule might move instructions further apart. We make sure not to look any further for an SP, as that might invalidate stack slots that are still in use. Differential Revision: https://reviews.llvm.org/D95881 </cut> Results regressed to (for first_bad == a838a4f69f500fc8e39fb4c9a1476f162ccf8423) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_marm artifacts/build-a838a4f69f500fc8e39fb4c9a1476f162ccf8423/results_id: 1 # 482.sphinx3,sphinx_livepretend_base.default regressed by 104 # 482.sphinx3,[.] vector_gautbl_eval_logs3 regressed by 115 from (for last_good == 20e3a6cb6270b68139f74529ab8efdfad1263533) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_marm artifacts/build-20e3a6cb6270b68139f74529ab8efdfad1263533/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of last_good: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O3/4025 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of first_bad: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O3/4022 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-a838a4f69f500fc8e39fb4c9a1476f162ccf8423 cd investigate-llvm-a838a4f69f500fc8e39fb4c9a1476f162ccf8423 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach a838a4f69f500fc8e39fb4c9a1476f162ccf8423 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 20e3a6cb6270b68139f74529ab8efdfad1263533 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Full commit (up to 1000 lines): <cut> commit a838a4f69f500fc8e39fb4c9a1476f162ccf8423 Author: David Green <david.green(a)arm.com> Date: Mon Feb 15 13:17:21 2021 +0000 [ARM] Extend search for increment in load/store optimizer Currently the findIncDecAfter will only look at the next instruction for post-inc candidates in the load/store optimizer. This extends that to a search through the current BB, until an instruction that modifies or uses the increment reg is found. This allows more post-inc load/stores and ldm/stm's to be created, especially in cases where a schedule might move instructions further apart. We make sure not to look any further for an SP, as that might invalidate stack slots that are still in use. Differential Revision: https://reviews.llvm.org/D95881 --- llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 40 +++++++++++++------ llvm/test/CodeGen/ARM/indexed-mem.ll | 6 +-- .../Thumb2/LowOverheadLoops/fast-fp-loops.ll | 9 ++--- .../Thumb2/LowOverheadLoops/mve-float-loops.ll | 45 ++++++++-------------- llvm/test/CodeGen/Thumb2/mve-float32regloops.ll | 6 +-- llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll | 9 ++--- llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll | 9 ++--- llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll | 18 +++------ llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll | 6 +-- llvm/test/CodeGen/Thumb2/mve-vldshuffle.ll | 3 +- 10 files changed, 66 insertions(+), 85 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index aa1fe4e4ffda..5fe61809f31b 100644 --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1238,19 +1238,37 @@ findIncDecBefore(MachineBasicBlock::iterator MBBI, Register Reg, /// Searches for a increment or decrement of \p Reg after \p MBBI. static MachineBasicBlock::iterator findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg, - ARMCC::CondCodes Pred, Register PredReg, int &Offset) { + ARMCC::CondCodes Pred, Register PredReg, int &Offset, + const TargetRegisterInfo *TRI) { Offset = 0; MachineBasicBlock &MBB = *MBBI->getParent(); MachineBasicBlock::iterator EndMBBI = MBB.end(); MachineBasicBlock::iterator NextMBBI = std::next(MBBI); - // Skip debug values. - while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr()) - ++NextMBBI; - if (NextMBBI == EndMBBI) - return EndMBBI; + while (NextMBBI != EndMBBI) { + // Skip debug values. + while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr()) + ++NextMBBI; + if (NextMBBI == EndMBBI) + return EndMBBI; + + unsigned Off = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg); + if (Off) { + Offset = Off; + return NextMBBI; + } - Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg); - return Offset == 0 ? EndMBBI : NextMBBI; + // SP can only be combined if it is the next instruction after the original + // MBBI, otherwise we may be incrementing the stack pointer (invalidating + // anything below the new pointer) when its frame elements are still in + // use. Other registers can attempt to look further, until a different use + // or def of the register is found. + if (Reg == ARM::SP || NextMBBI->readsRegister(Reg, TRI) || + NextMBBI->definesRegister(Reg, TRI)) + return EndMBBI; + + ++NextMBBI; + } + return EndMBBI; } /// Fold proceeding/trailing inc/dec of base register into the @@ -1296,7 +1314,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { } else if (Mode == ARM_AM::ib && Offset == -Bytes) { Mode = ARM_AM::da; } else { - MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); + MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI); if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) { @@ -1483,7 +1501,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { } else if (Offset == -Bytes) { NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); } else { - MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); + MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI); if (Offset == Bytes) { NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); } else if (!isAM5 && Offset == -Bytes) { @@ -1614,7 +1632,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { if (Offset == 8 || Offset == -8) { NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; } else { - MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); + MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI); if (Offset == 8 || Offset == -8) { NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; } else diff --git a/llvm/test/CodeGen/ARM/indexed-mem.ll b/llvm/test/CodeGen/ARM/indexed-mem.ll index a5f8409a50a2..295bb377d732 100644 --- a/llvm/test/CodeGen/ARM/indexed-mem.ll +++ b/llvm/test/CodeGen/ARM/indexed-mem.ll @@ -220,16 +220,14 @@ define i32* @pre_dec_ldrd(i32* %base) { define i32* @post_inc_ldrd(i32* %base, i32* %addr.3) { ; CHECK-V8M-LABEL: post_inc_ldrd: ; CHECK-V8M: @ %bb.0: -; CHECK-V8M-NEXT: ldrd r2, r3, [r0] -; CHECK-V8M-NEXT: adds r0, #8 +; CHECK-V8M-NEXT: ldrd r2, r3, [r0], #8 ; CHECK-V8M-NEXT: add r2, r3 ; CHECK-V8M-NEXT: str r2, [r1] ; CHECK-V8M-NEXT: bx lr ; ; CHECK-V8A-LABEL: post_inc_ldrd: ; CHECK-V8A: @ %bb.0: -; CHECK-V8A-NEXT: ldm r0, {r2, r3} -; CHECK-V8A-NEXT: add r0, r0, #8 +; CHECK-V8A-NEXT: ldm r0!, {r2, r3} ; CHECK-V8A-NEXT: add r2, r2, r3 ; CHECK-V8A-NEXT: str r2, [r1] ; CHECK-V8A-NEXT: bx lr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll index f8fb8476c322..8b27a9348418 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll @@ -82,13 +82,10 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur ; CHECK-NEXT: add.w r0, r0, r3, lsl #2 ; CHECK-NEXT: .LBB0_10: @ %for.body.epil ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldr s0, [r1] -; CHECK-NEXT: adds r1, #4 -; CHECK-NEXT: vldr s2, [r2] -; CHECK-NEXT: adds r2, #4 +; CHECK-NEXT: vldmia r1!, {s0} +; CHECK-NEXT: vldmia r2!, {s2} ; CHECK-NEXT: vmul.f32 s0, s2, s0 -; CHECK-NEXT: vstr s0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstmia r0!, {s0} ; CHECK-NEXT: le lr, .LBB0_10 ; CHECK-NEXT: .LBB0_11: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, r5, r6, r7, pc} diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll index f962458ddb11..d143976927b2 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll @@ -43,14 +43,11 @@ define arm_aapcs_vfpcc void @float_float_mul(float* nocapture readonly %a, float ; CHECK-NEXT: add.w r7, r2, r12, lsl #2 ; CHECK-NEXT: .LBB0_6: @ %for.body.prol ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldr s0, [r6] -; CHECK-NEXT: adds r6, #4 -; CHECK-NEXT: vldr s2, [r5] -; CHECK-NEXT: adds r5, #4 +; CHECK-NEXT: vldmia r6!, {s0} ; CHECK-NEXT: add.w r12, r12, #1 +; CHECK-NEXT: vldmia r5!, {s2} ; CHECK-NEXT: vmul.f32 s0, s2, s0 -; CHECK-NEXT: vstr s0, [r7] -; CHECK-NEXT: adds r7, #4 +; CHECK-NEXT: vstmia r7!, {s0} ; CHECK-NEXT: le lr, .LBB0_6 ; CHECK-NEXT: .LBB0_7: @ %for.body.prol.loopexit ; CHECK-NEXT: cmp r4, #3 @@ -261,14 +258,11 @@ define arm_aapcs_vfpcc void @float_float_add(float* nocapture readonly %a, float ; CHECK-NEXT: add.w r7, r2, r12, lsl #2 ; CHECK-NEXT: .LBB1_6: @ %for.body.prol ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldr s0, [r6] -; CHECK-NEXT: adds r6, #4 -; CHECK-NEXT: vldr s2, [r5] -; CHECK-NEXT: adds r5, #4 +; CHECK-NEXT: vldmia r6!, {s0} ; CHECK-NEXT: add.w r12, r12, #1 +; CHECK-NEXT: vldmia r5!, {s2} ; CHECK-NEXT: vadd.f32 s0, s2, s0 -; CHECK-NEXT: vstr s0, [r7] -; CHECK-NEXT: adds r7, #4 +; CHECK-NEXT: vstmia r7!, {s0} ; CHECK-NEXT: le lr, .LBB1_6 ; CHECK-NEXT: .LBB1_7: @ %for.body.prol.loopexit ; CHECK-NEXT: cmp r4, #3 @@ -479,14 +473,11 @@ define arm_aapcs_vfpcc void @float_float_sub(float* nocapture readonly %a, float ; CHECK-NEXT: add.w r7, r2, r12, lsl #2 ; CHECK-NEXT: .LBB2_6: @ %for.body.prol ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldr s0, [r6] -; CHECK-NEXT: adds r6, #4 -; CHECK-NEXT: vldr s2, [r5] -; CHECK-NEXT: adds r5, #4 +; CHECK-NEXT: vldmia r6!, {s0} ; CHECK-NEXT: add.w r12, r12, #1 +; CHECK-NEXT: vldmia r5!, {s2} ; CHECK-NEXT: vsub.f32 s0, s2, s0 -; CHECK-NEXT: vstr s0, [r7] -; CHECK-NEXT: adds r7, #4 +; CHECK-NEXT: vstmia r7!, {s0} ; CHECK-NEXT: le lr, .LBB2_6 ; CHECK-NEXT: .LBB2_7: @ %for.body.prol.loopexit ; CHECK-NEXT: cmp r4, #3 @@ -706,13 +697,11 @@ define arm_aapcs_vfpcc void @float_int_mul(float* nocapture readonly %a, i32* no ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r4, [r6], #4 ; CHECK-NEXT: add.w r12, r12, #1 -; CHECK-NEXT: vldr s2, [r5] -; CHECK-NEXT: adds r5, #4 +; CHECK-NEXT: vldmia r5!, {s2} ; CHECK-NEXT: vmov s0, r4 ; CHECK-NEXT: vcvt.f32.s32 s0, s0 ; CHECK-NEXT: vmul.f32 s0, s2, s0 -; CHECK-NEXT: vstr s0, [r7] -; CHECK-NEXT: adds r7, #4 +; CHECK-NEXT: vstmia r7!, {s0} ; CHECK-NEXT: le lr, .LBB3_9 ; CHECK-NEXT: .LBB3_10: @ %for.body.prol.loopexit ; CHECK-NEXT: cmp.w r8, #3 @@ -1025,8 +1014,7 @@ define arm_aapcs_vfpcc void @half_half_mul(half* nocapture readonly %a, half* no ; CHECK-NEXT: adds r1, #2 ; CHECK-NEXT: vmul.f16 s0, s2, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0 -; CHECK-NEXT: vstr s0, [r2] -; CHECK-NEXT: adds r2, #4 +; CHECK-NEXT: vstmia r2!, {s0} ; CHECK-NEXT: le lr, .LBB5_7 ; CHECK-NEXT: .LBB5_8: @ %for.cond.cleanup ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} @@ -1140,8 +1128,7 @@ define arm_aapcs_vfpcc void @half_half_add(half* nocapture readonly %a, half* no ; CHECK-NEXT: adds r1, #2 ; CHECK-NEXT: vadd.f16 s0, s2, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0 -; CHECK-NEXT: vstr s0, [r2] -; CHECK-NEXT: adds r2, #4 +; CHECK-NEXT: vstmia r2!, {s0} ; CHECK-NEXT: le lr, .LBB6_7 ; CHECK-NEXT: .LBB6_8: @ %for.cond.cleanup ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} @@ -1255,8 +1242,7 @@ define arm_aapcs_vfpcc void @half_half_sub(half* nocapture readonly %a, half* no ; CHECK-NEXT: adds r1, #2 ; CHECK-NEXT: vsub.f16 s0, s2, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0 -; CHECK-NEXT: vstr s0, [r2] -; CHECK-NEXT: adds r2, #4 +; CHECK-NEXT: vstmia r2!, {s0} ; CHECK-NEXT: le lr, .LBB7_7 ; CHECK-NEXT: .LBB7_8: @ %for.cond.cleanup ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} @@ -1376,8 +1362,7 @@ define arm_aapcs_vfpcc void @half_short_mul(half* nocapture readonly %a, i16* no ; CHECK-NEXT: vcvt.f16.s32 s2, s2 ; CHECK-NEXT: vmul.f16 s0, s0, s2 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0 -; CHECK-NEXT: vstr s0, [r2] -; CHECK-NEXT: adds r2, #4 +; CHECK-NEXT: vstmia r2!, {s0} ; CHECK-NEXT: le lr, .LBB8_7 ; CHECK-NEXT: .LBB8_8: @ %for.cond.cleanup ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll index 0156cfe25f8e..7e4603e4b4c6 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1442,8 +1442,7 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_stereo_df2T_f32(%struct.arm_biqu ; CHECK-NEXT: adds r1, #8 ; CHECK-NEXT: vfma.f32 q5, q4, r5 ; CHECK-NEXT: vfma.f32 q3, q5, q2 -; CHECK-NEXT: vstmia r7, {s20, s21} -; CHECK-NEXT: adds r7, #8 +; CHECK-NEXT: vstmia r7!, {s20, s21} ; CHECK-NEXT: vfma.f32 q3, q4, q1 ; CHECK-NEXT: vstrw.32 q3, [r4] ; CHECK-NEXT: le lr, .LBB17_3 @@ -2069,8 +2068,7 @@ define void @arm_biquad_cascade_df2T_f32(%struct.arm_biquad_cascade_df2T_instanc ; CHECK-NEXT: .LBB20_5: @ %while.body ; CHECK-NEXT: @ Parent Loop BB20_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: ldrd r7, r4, [r1] -; CHECK-NEXT: adds r1, #8 +; CHECK-NEXT: ldrd r7, r4, [r1], #8 ; CHECK-NEXT: vfma.f32 q6, q3, r7 ; CHECK-NEXT: vmov r7, s24 ; CHECK-NEXT: vmov q1, q6 diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll index b34896e32859..0eb1226f60db 100644 --- a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll @@ -309,14 +309,11 @@ define void @fma8(float* noalias nocapture readonly %A, float* noalias nocapture ; CHECK-NEXT: add.w r2, r2, r12, lsl #2 ; CHECK-NEXT: .LBB2_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldr s0, [r0] -; CHECK-NEXT: adds r0, #4 -; CHECK-NEXT: vldr s2, [r1] -; CHECK-NEXT: adds r1, #4 +; CHECK-NEXT: vldmia r0!, {s0} +; CHECK-NEXT: vldmia r1!, {s2} ; CHECK-NEXT: vldr s4, [r2] ; CHECK-NEXT: vfma.f32 s4, s2, s0 -; CHECK-NEXT: vstr s4, [r2] -; CHECK-NEXT: adds r2, #4 +; CHECK-NEXT: vstmia r2!, {s4} ; CHECK-NEXT: le lr, .LBB2_7 ; CHECK-NEXT: .LBB2_8: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, r5, r6, pc} diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll index 070d9b744836..1b6cdfc517be 100644 --- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll @@ -44,14 +44,11 @@ define void @fma(float* noalias nocapture readonly %A, float* noalias nocapture ; CHECK-NEXT: add.w r2, r2, r12, lsl #2 ; CHECK-NEXT: .LBB0_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldr s0, [r0] -; CHECK-NEXT: adds r0, #4 -; CHECK-NEXT: vldr s2, [r1] -; CHECK-NEXT: adds r1, #4 +; CHECK-NEXT: vldmia r0!, {s0} +; CHECK-NEXT: vldmia r1!, {s2} ; CHECK-NEXT: vldr s4, [r2] ; CHECK-NEXT: vfma.f32 s4, s2, s0 -; CHECK-NEXT: vstr s4, [r2] -; CHECK-NEXT: adds r2, #4 +; CHECK-NEXT: vstmia r2!, {s4} ; CHECK-NEXT: le lr, .LBB0_7 ; CHECK-NEXT: .LBB0_8: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, r5, r6, pc} diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll index 93a1535a42fe..f69eeb773a9f 100644 --- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -38,12 +38,10 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vmvn.i32 q1, #0x80000000 ; CHECK-NEXT: .LBB0_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrd r5, r4, [r0] +; CHECK-NEXT: ldrd r5, r4, [r0], #8 ; CHECK-NEXT: mov.w r3, #-1 -; CHECK-NEXT: ldrd r8, r7, [r1] -; CHECK-NEXT: adds r0, #8 +; CHECK-NEXT: ldrd r8, r7, [r1], #8 ; CHECK-NEXT: smull r4, r7, r7, r4 -; CHECK-NEXT: adds r1, #8 ; CHECK-NEXT: asrl r4, r7, #31 ; CHECK-NEXT: smull r6, r5, r8, r5 ; CHECK-NEXT: rsbs.w r9, r4, #-2147483648 @@ -95,8 +93,7 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vorr q2, q2, q4 ; CHECK-NEXT: vmov r3, s10 ; CHECK-NEXT: vmov r4, s8 -; CHECK-NEXT: strd r4, r3, [r2] -; CHECK-NEXT: adds r2, #8 +; CHECK-NEXT: strd r4, r3, [r2], #8 ; CHECK-NEXT: le lr, .LBB0_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: ldrd r7, r3, [sp] @ 8-byte Folded Reload @@ -744,10 +741,8 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: add.w r12, r0, r5, lsl #2 ; CHECK-NEXT: .LBB3_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrd r4, r7, [r0] -; CHECK-NEXT: adds r0, #8 -; CHECK-NEXT: ldrd r5, r10, [r1] -; CHECK-NEXT: adds r1, #8 +; CHECK-NEXT: ldrd r4, r7, [r0], #8 +; CHECK-NEXT: ldrd r5, r10, [r1], #8 ; CHECK-NEXT: umull r4, r5, r5, r4 ; CHECK-NEXT: lsrl r4, r5, #31 ; CHECK-NEXT: subs.w r6, r4, #-1 @@ -773,8 +768,7 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vorn q0, q1, q0 ; CHECK-NEXT: vmov r4, s2 ; CHECK-NEXT: vmov r5, s0 -; CHECK-NEXT: strd r5, r4, [r2] -; CHECK-NEXT: adds r2, #8 +; CHECK-NEXT: strd r5, r4, [r2], #8 ; CHECK-NEXT: le lr, .LBB3_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: ldr r7, [sp] @ 4-byte Reload diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll index 803f20571672..4393e4646bab 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll @@ -521,8 +521,7 @@ define float @fadd_f32(float* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: add.w r0, r0, r2, lsl #2 ; CHECK-NEXT: .LBB5_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldr s2, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldmia r0!, {s2} ; CHECK-NEXT: vadd.f32 s0, s2, s0 ; CHECK-NEXT: le lr, .LBB5_8 ; CHECK-NEXT: .LBB5_9: @ %for.cond.cleanup @@ -620,8 +619,7 @@ define float @fmul_f32(float* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: add.w r0, r0, r2, lsl #2 ; CHECK-NEXT: .LBB6_8: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldr s2, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldmia r0!, {s2} ; CHECK-NEXT: vmul.f32 s0, s2, s0 ; CHECK-NEXT: le lr, .LBB6_8 ; CHECK-NEXT: .LBB6_9: @ %for.cond.cleanup diff --git a/llvm/test/CodeGen/Thumb2/mve-vldshuffle.ll b/llvm/test/CodeGen/Thumb2/mve-vldshuffle.ll index 80e65f1ee855..d26757fc99e8 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vldshuffle.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vldshuffle.ll @@ -176,8 +176,7 @@ define void @arm_cmplx_mag_squared_f32(float* nocapture readonly %pSrc, float* n ; CHECK-NEXT: adds r3, #8 ; CHECK-NEXT: vmul.f32 s0, s0, s0 ; CHECK-NEXT: vfma.f32 s0, s2, s2 -; CHECK-NEXT: vstr s0, [r12] -; CHECK-NEXT: add.w r12, r12, #4 +; CHECK-NEXT: vstmia r12!, {s0} ; CHECK-NEXT: le lr, .LBB1_7 ; CHECK-NEXT: .LBB1_8: @ %while.end ; CHECK-NEXT: pop {r4, r5, r7, pc} </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_gcc_bootstrap/master-arm-bootstrap_debug - Build # 1 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *gcc* in CI configuration tcwg_gcc_bootstrap/master-arm-bootstrap_debug. So far, this commit has regressed CI configurations: - tcwg_gcc_bootstrap/master-arm-bootstrap_debug Culprit: <cut> commit d881460deb1f0bdfc3e8fa2d391a03a9763cbff4 Author: Harald Anlauf <anlauf(a)gmx.de> Date: Thu Aug 19 21:00:45 2021 +0200 Fortran - simplify length of substring with constant bounds gcc/fortran/ChangeLog: PR fortran/100950 * simplify.c (substring_has_constant_len): New. (gfc_simplify_len): Handle case of substrings with constant bounds. gcc/testsuite/ChangeLog: PR fortran/100950 * gfortran.dg/pr100950.f90: New test. </cut> Results regressed to (for first_bad == d881460deb1f0bdfc3e8fa2d391a03a9763cbff4) # reset_artifacts: -10 # true: 0 # build_abe binutils: 1 # First few build errors in logs: # 00:12:48 make[3]: [armv8l-unknown-linux-gnueabihf/bits/largefile-config.h] Error 1 (ignored) # 00:21:17 /home/tcwg-buildslave/workspace/tcwg_gnu_15/abe/snapshots/gcc.git~master/gcc/fortran/simplify.c:4557:22: error: unknown conversion type character ‘l’ in format [-Werror=format=] # 00:21:17 /home/tcwg-buildslave/workspace/tcwg_gnu_15/abe/snapshots/gcc.git~master/gcc/fortran/simplify.c:4557:22: error: format ‘%L’ expects argument of type ‘locus*’, but argument 2 has type ‘long long int’ [-Werror=format=] # 00:21:17 /home/tcwg-buildslave/workspace/tcwg_gnu_15/abe/snapshots/gcc.git~master/gcc/fortran/simplify.c:4557:22: error: too many arguments for format [-Werror=format-extra-args] # 00:21:17 /home/tcwg-buildslave/workspace/tcwg_gnu_15/abe/snapshots/gcc.git~master/gcc/fortran/simplify.c:4570:22: error: unknown conversion type character ‘l’ in format [-Werror=format=] # 00:21:17 /home/tcwg-buildslave/workspace/tcwg_gnu_15/abe/snapshots/gcc.git~master/gcc/fortran/simplify.c:4570:22: error: format ‘%L’ expects argument of type ‘locus*’, but argument 2 has type ‘long long int’ [-Werror=format=] # 00:21:17 /home/tcwg-buildslave/workspace/tcwg_gnu_15/abe/snapshots/gcc.git~master/gcc/fortran/simplify.c:4570:22: error: too many arguments for format [-Werror=format-extra-args] # 00:21:38 make[3]: *** [fortran/simplify.o] Error 1 # 00:28:40 make[2]: *** [all-stage2-gcc] Error 2 # 00:28:40 make[1]: *** [stage2-bubble] Error 2 from (for last_good == 77bf9f83b8e388de8bedb259991f588a7b8a7f57) # reset_artifacts: -10 # true: 0 # build_abe binutils: 1 # build_abe bootstrap_debug: 2 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-arm-bootstrap_de… Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-arm-bootstrap_de… Build top page/logs: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-arm-bootstrap_de… Configuration details: Reproduce builds: <cut> mkdir investigate-gcc-d881460deb1f0bdfc3e8fa2d391a03a9763cbff4 cd investigate-gcc-d881460deb1f0bdfc3e8fa2d391a03a9763cbff4 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-arm-bootstrap_de… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-arm-bootstrap_de… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-arm-bootstrap_de… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_gnu-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /gcc/ ./ ./bisect/baseline/ cd gcc # Reproduce first_bad build git checkout --detach d881460deb1f0bdfc3e8fa2d391a03a9763cbff4 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 77bf9f83b8e388de8bedb259991f588a7b8a7f57 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-arm-bootstrap_de… Build log: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-arm-bootstrap_de… Full commit (up to 1000 lines): <cut> commit d881460deb1f0bdfc3e8fa2d391a03a9763cbff4 Author: Harald Anlauf <anlauf(a)gmx.de> Date: Thu Aug 19 21:00:45 2021 +0200 Fortran - simplify length of substring with constant bounds gcc/fortran/ChangeLog: PR fortran/100950 * simplify.c (substring_has_constant_len): New. (gfc_simplify_len): Handle case of substrings with constant bounds. gcc/testsuite/ChangeLog: PR fortran/100950 * gfortran.dg/pr100950.f90: New test. --- gcc/fortran/simplify.c | 75 +++++++++++++++++++++++++++++++++- gcc/testsuite/gfortran.dg/pr100950.f90 | 53 ++++++++++++++++++++++++ 2 files changed, 127 insertions(+), 1 deletion(-) diff --git a/gcc/fortran/simplify.c b/gcc/fortran/simplify.c index c27b47aa98f..492867e12cb 100644 --- a/gcc/fortran/simplify.c +++ b/gcc/fortran/simplify.c @@ -4512,6 +4512,78 @@ gfc_simplify_leadz (gfc_expr *e) } +/* Check for constant length of a substring. */ + +static bool +substring_has_constant_len (gfc_expr *e) +{ + gfc_ref *ref; + HOST_WIDE_INT istart, iend, length; + bool equal_length = false; + + if (e->ts.type != BT_CHARACTER) + return false; + + for (ref = e->ref; ref; ref = ref->next) + if (ref->type != REF_COMPONENT && ref->type != REF_ARRAY) + break; + + if (!ref + || ref->type != REF_SUBSTRING + || !ref->u.ss.start + || ref->u.ss.start->expr_type != EXPR_CONSTANT + || !ref->u.ss.end + || ref->u.ss.end->expr_type != EXPR_CONSTANT + || !ref->u.ss.length) + return false; + + /* For non-deferred strings the given length shall be constant. */ + if (!e->ts.deferred + && (!ref->u.ss.length->length + || ref->u.ss.length->length->expr_type != EXPR_CONSTANT)) + return false; + + /* Basic checks on substring starting and ending indices. */ + if (!gfc_resolve_substring (ref, &equal_length)) + return false; + + istart = gfc_mpz_get_hwi (ref->u.ss.start->value.integer); + iend = gfc_mpz_get_hwi (ref->u.ss.end->value.integer); + + if (istart <= iend) + { + if (istart < 1) + { + gfc_error ("Substring start index (" HOST_WIDE_INT_PRINT_DEC + ") at %L below 1", + istart, &ref->u.ss.start->where); + return false; + } + + /* For deferred strings use end index as proxy for length. */ + if (e->ts.deferred) + length = iend; + else + length = gfc_mpz_get_hwi (ref->u.ss.length->length->value.integer); + if (iend > length) + { + gfc_error ("Substring end index (" HOST_WIDE_INT_PRINT_DEC + ") at %L exceeds string length", + iend, &ref->u.ss.end->where); + return false; + } + length = iend - istart + 1; + } + else + length = 0; + + /* Fix substring length. */ + e->value.character.length = length; + + return true; +} + + gfc_expr * gfc_simplify_len (gfc_expr *e, gfc_expr *kind) { @@ -4521,7 +4593,8 @@ gfc_simplify_len (gfc_expr *e, gfc_expr *kind) if (k == -1) return &gfc_bad_expr; - if (e->expr_type == EXPR_CONSTANT) + if (e->expr_type == EXPR_CONSTANT + || substring_has_constant_len (e)) { result = gfc_get_constant_expr (BT_INTEGER, k, &e->where); mpz_set_si (result->value.integer, e->value.character.length); diff --git a/gcc/testsuite/gfortran.dg/pr100950.f90 b/gcc/testsuite/gfortran.dg/pr100950.f90 new file mode 100644 index 00000000000..cb9d126bc18 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr100950.f90 @@ -0,0 +1,53 @@ +! { dg-do run } +! { dg-additional-options "-fdump-tree-original" } +! PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514 + +program p + character(8), parameter :: u = "123" + character(8) :: x = "", s + character(2) :: w(2) = [character(len(x(3:4))) :: 'a','b' ] + character(*), parameter :: y(*) = [character(len(u(3:4))) :: 'a','b' ] + character(*), parameter :: z(*) = [character(len(x(3:4))) :: 'a','b' ] + character(*), parameter :: t(*) = [character(len(x( :2))) :: 'a','b' ] + character(*), parameter :: v(*) = [character(len(x(7: ))) :: 'a','b' ] + type t_ + character(len=5) :: s + character(len=8) :: t(4) + character(len=8), pointer :: u(:) + character(len=:), allocatable :: str + character(len=:), allocatable :: str2(:) + end type t_ + type(t_) :: q, r(1) + integer, parameter :: lq = len (q%s(3:4)), lr = len (r%s(3:4)) + integer, parameter :: l1 = len (q %t(1)(3:4)) + integer, parameter :: l2 = len (q %t(:)(3:4)) + integer, parameter :: l3 = len (q %str (3:4)) + integer, parameter :: l4 = len (r(:)%t(1)(3:4)) + integer, parameter :: l5 = len (r(1)%t(:)(3:4)) + integer, parameter :: l6 = len (r(1)%str (3:4)) + integer, parameter :: l7 = len (r(1)%str2(1)(3:4)) + integer, parameter :: l8 = len (r(1)%str2(:)(3:4)) + + if (len (y) /= 2) stop 1 + if (len (z) /= 2) stop 2 + if (any (w /= y)) stop 3 + if (len ([character(len(u(3:4))) :: 'a','b' ]) /= 2) stop 4 + if (len ([character(len(x(3:4))) :: 'a','b' ]) /= 2) stop 5 + if (any ([character(len(x(3:4))) :: 'a','b' ] /= y)) stop 6 + write(s,*) [character(len(x(3:4))) :: 'a','b' ] + if (s /= " a b ") stop 7 + if (len (t) /= 2) stop 8 + if (len (v) /= 2) stop 9 + if (lq /= 2 .or. lr /= 2) stop 10 + if (l1 /= 2 .or. l2 /= 2 .or. l4 /= 2 .or. l5 /= 2) stop 11 + if (l3 /= 2 .or. l6 /= 2 .or. l7 /= 2 .or. l8 /= 2) stop 12 + + block + integer, parameter :: l9 = len (r(1)%u(:)(3:4)) + if (l9 /= 2) stop 13 + end block +end + +! { dg-final { scan-tree-dump-times "_gfortran_stop_numeric" 2 "original" } } +! { dg-final { scan-tree-dump "_gfortran_stop_numeric \$3, 0\$;" "original" } } +! { dg-final { scan-tree-dump "_gfortran_stop_numeric \$7, 0\$;" "original" } } </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_apm/llvm-master-aarch64-spec2k6-Oz - Build # 7 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *linux* in CI configuration tcwg_bmk_llvm_apm/llvm-master-aarch64-spec2k6-Oz. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_apm/llvm-master-aarch64-spec2k6-Oz Culprit: <cut> commit 9ecccaf9771d3f3bb68ef69d34965b1aad874bd6 Merge: 1e28eed17697 12aca1ce9ee3 Author: Rob Clark <robdclark(a)chromium.org> Date: Wed Apr 7 11:04:47 2021 -0700 Merge tag 'drm-msm-fixes-2021-04-02' into msm-next Pull in fixes from previous cycle </cut> Results regressed to (for first_bad == 9ecccaf9771d3f3bb68ef69d34965b1aad874bd6) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Oz artifacts/build-9ecccaf9771d3f3bb68ef69d34965b1aad874bd6/results_id: 1 # 482.sphinx3,[.] OUTLINED_FUNCTION_4 regressed by 175 from (for last_good == 12aca1ce9ee33af3751aec5e55a5900747cbdd4b) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Oz artifacts/build-12aca1ce9ee33af3751aec5e55a5900747cbdd4b/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Results ID of last_good: apm_64/tcwg_bmk_llvm_apm/bisect-llvm-master-aarch64-spec2k6-Oz/3996 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Results ID of first_bad: apm_64/tcwg_bmk_llvm_apm/bisect-llvm-master-aarch64-spec2k6-Oz/3987 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Configuration details: Reproduce builds: <cut> mkdir investigate-linux-9ecccaf9771d3f3bb68ef69d34965b1aad874bd6 cd investigate-linux-9ecccaf9771d3f3bb68ef69d34965b1aad874bd6 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /linux/ ./ ./bisect/baseline/ cd linux # Reproduce first_bad build git checkout --detach 9ecccaf9771d3f3bb68ef69d34965b1aad874bd6 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 12aca1ce9ee33af3751aec5e55a5900747cbdd4b ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Full commit (up to 1000 lines): <cut> commit 9ecccaf9771d3f3bb68ef69d34965b1aad874bd6 Merge: 1e28eed17697 12aca1ce9ee3 Author: Rob Clark <robdclark(a)chromium.org> Date: Wed Apr 7 11:04:47 2021 -0700 Merge tag 'drm-msm-fixes-2021-04-02' into msm-next Pull in fixes from previous cycle drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 +- drivers/gpu/drm/msm/adreno/a5xx_power.c | 2 +- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 2 +- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 108 ++++++++++++++++++++--------- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c | 4 +- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 12 ++-- drivers/gpu/drm/msm/dp/dp_aux.c | 7 ++ drivers/gpu/drm/msm/dsi/pll/dsi_pll.c | 2 +- drivers/gpu/drm/msm/dsi/pll/dsi_pll.h | 6 +- drivers/gpu/drm/msm/dsi/pll/dsi_pll_7nm.c | 11 +-- drivers/gpu/drm/msm/msm_atomic.c | 7 +- drivers/gpu/drm/msm/msm_drv.c | 13 ++++ drivers/gpu/drm/msm/msm_fence.c | 2 +- drivers/gpu/drm/msm/msm_kms.h | 8 +-- 14 files changed, 128 insertions(+), 60 deletions(-) </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/llvm-release-aarch64-spec2k6-O3_LTO - Build # 6 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-release-aarch64-spec2k6-O3_LTO. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tx1/llvm-release-aarch64-spec2k6-O3_LTO Culprit: <cut> commit cedfa38fc46d7531c44ea230b767d8286767f350 Author: Jin Lin <jinl(a)uber.com> Date: Fri Apr 23 22:37:08 2021 -0700 Preserve the lexical order for global variables during llvm-link merge The order of global variables is generated in the order of recursively materializing variables if the global variable has the attribute of hasLocalLinkage or hasLinkOnceLinkage during the module merging. In practice, it is often the exact reverse of source order. This new order may cause performance regression. The change is to preserve the original lexical order for global variables. Reviewed By: jdoerfert, dexonsmith Differential Revision: https://reviews.llvm.org/D94202 </cut> Results regressed to (for first_bad == cedfa38fc46d7531c44ea230b767d8286767f350) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_LTO artifacts/build-cedfa38fc46d7531c44ea230b767d8286767f350/results_id: 1 # 458.sjeng,sjeng_base.default regressed by 103 from (for last_good == 9579af2bd7f39b2118039b66b1a762cf05e7b102) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_LTO artifacts/build-9579af2bd7f39b2118039b66b1a762cf05e7b102/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-release-aarch64-spec2k6-O3_LTO/3990 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-release-aarch64-spec2k6-O3_LTO/3980 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-cedfa38fc46d7531c44ea230b767d8286767f350 cd investigate-llvm-cedfa38fc46d7531c44ea230b767d8286767f350 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach cedfa38fc46d7531c44ea230b767d8286767f350 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 9579af2bd7f39b2118039b66b1a762cf05e7b102 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Full commit (up to 1000 lines): <cut> commit cedfa38fc46d7531c44ea230b767d8286767f350 Author: Jin Lin <jinl(a)uber.com> Date: Fri Apr 23 22:37:08 2021 -0700 Preserve the lexical order for global variables during llvm-link merge The order of global variables is generated in the order of recursively materializing variables if the global variable has the attribute of hasLocalLinkage or hasLinkOnceLinkage during the module merging. In practice, it is often the exact reverse of source order. This new order may cause performance regression. The change is to preserve the original lexical order for global variables. Reviewed By: jdoerfert, dexonsmith Differential Revision: https://reviews.llvm.org/D94202 --- llvm/lib/Linker/IRMover.cpp | 14 ++++++++++++++ llvm/test/Linker/Inputs/globalorder-2.ll | 14 ++++++++++++++ llvm/test/Linker/comdat.ll | 2 +- llvm/test/Linker/comdat14.ll | 2 +- llvm/test/Linker/ctors.ll | 2 +- llvm/test/Linker/ctors2.ll | 2 +- llvm/test/Linker/ctors3.ll | 2 +- llvm/test/Linker/globalorder.ll | 27 +++++++++++++++++++++++++++ llvm/test/Linker/link-flags.ll | 2 +- llvm/test/Linker/metadata-attach.ll | 18 +++++++++--------- llvm/test/Linker/testlink.ll | 2 +- llvm/test/ThinLTO/X86/import-constant.ll | 8 ++++---- llvm/test/ThinLTO/X86/index-const-prop.ll | 6 +++--- llvm/test/ThinLTO/X86/index-const-prop2.ll | 6 +++--- llvm/test/ThinLTO/X86/writeonly.ll | 8 ++++---- llvm/test/ThinLTO/X86/writeonly2.ll | 4 ++-- 16 files changed, 87 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp index 1433d074595b..9489f9655469 100644 --- a/llvm/lib/Linker/IRMover.cpp +++ b/llvm/lib/Linker/IRMover.cpp @@ -1509,6 +1509,20 @@ Error IRLinker::run() { }); } + // Reorder the globals just added to the destination module to match their + // original order in the source module. + Module::GlobalListType &Globals = DstM.getGlobalList(); + for (GlobalVariable &GV : SrcM->globals()) { + if (GV.hasAppendingLinkage()) + continue; + Value *NewValue = Mapper.mapValue(GV); + if (NewValue) { + auto *NewGV = dyn_cast<GlobalVariable>(NewValue->stripPointerCasts()); + if (NewGV) + Globals.splice(Globals.end(), Globals, NewGV->getIterator()); + } + } + // Merge the module flags into the DstM module. return linkModuleFlagsMetadata(); } diff --git a/llvm/test/Linker/Inputs/globalorder-2.ll b/llvm/test/Linker/Inputs/globalorder-2.ll new file mode 100644 index 000000000000..a984ebf78a6a --- /dev/null +++ b/llvm/test/Linker/Inputs/globalorder-2.ll @@ -0,0 +1,14 @@ +@var5 = internal global i32 0, align 4 +@var6 = internal global i32 0, align 4 +@var7 = global i32* @var5, align 4 +@var8 = global i32* @var6, align 4 + +define i32 @foo2() { +entry: + %0 = load i32*, i32** @var7, align 4 + %1 = load i32, i32* %0, align 4 + %2 = load i32*, i32** @var8, align 4 + %3 = load i32, i32* %2, align 4 + %add = add nsw i32 %3, %1 + ret i32 %add +} diff --git a/llvm/test/Linker/comdat.ll b/llvm/test/Linker/comdat.ll index 2a2ec3bbb6b2..e3bb1ea39260 100644 --- a/llvm/test/Linker/comdat.ll +++ b/llvm/test/Linker/comdat.ll @@ -23,9 +23,9 @@ $any = comdat any ; CHECK: $foo = comdat largest ; CHECK: $any = comdat any +; CHECK: @foo = global i64 43, comdat{{$}} ; CHECK: @qux = global i64 12, comdat{{$}} ; CHECK: @any = global i64 6, comdat{{$}} -; CHECK: @foo = global i64 43, comdat{{$}} ; CHECK-NOT: @in_unselected_group = global i32 13, comdat $qux ; CHECK: define i32 @baz() comdat($qux) diff --git a/llvm/test/Linker/comdat14.ll b/llvm/test/Linker/comdat14.ll index 9c6eb7c4cc1d..1a1ba47aa017 100644 --- a/llvm/test/Linker/comdat14.ll +++ b/llvm/test/Linker/comdat14.ll @@ -5,5 +5,5 @@ $c = comdat any @v = global i32 0, comdat ($c) ; CHECK: @v = global i32 0, comdat($c) -; CHECK: @v2 = external dllexport global i32 ; CHECK: @v3 = external global i32 +; CHECK: @v2 = external dllexport global i32 diff --git a/llvm/test/Linker/ctors.ll b/llvm/test/Linker/ctors.ll index 37dba23d4c91..f68ca3e54762 100644 --- a/llvm/test/Linker/ctors.ll +++ b/llvm/test/Linker/ctors.ll @@ -6,12 +6,12 @@ ; Test the bitcode writer too. It used to crash. ; RUN: llvm-link %s %p/Inputs/ctors.ll -o %t.bc +; ALL: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* @v }] @v = weak global i8 0 ; CHECK1: @v = weak global i8 0 ; CHECK2: @v = weak global i8 1 @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* @v }] -; ALL: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* @v }] define weak void @f() { ret void diff --git a/llvm/test/Linker/ctors2.ll b/llvm/test/Linker/ctors2.ll index 9b7a70eb7cd1..c02973faf4d0 100644 --- a/llvm/test/Linker/ctors2.ll +++ b/llvm/test/Linker/ctors2.ll @@ -3,5 +3,5 @@ $foo = comdat any @foo = global i8 0, comdat -; CHECK: @foo = global i8 0, comdat ; CHECK: @llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] zeroinitializer +; CHECK: @foo = global i8 0, comdat diff --git a/llvm/test/Linker/ctors3.ll b/llvm/test/Linker/ctors3.ll index e62b92dca0b4..d522df58e891 100644 --- a/llvm/test/Linker/ctors3.ll +++ b/llvm/test/Linker/ctors3.ll @@ -4,5 +4,5 @@ $foo = comdat any %t = type { i8 } @foo = global %t zeroinitializer, comdat -; CHECK: @foo = global %t zeroinitializer, comdat ; CHECK: @llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] zeroinitializer +; CHECK: @foo = global %t zeroinitializer, comdat diff --git a/llvm/test/Linker/globalorder.ll b/llvm/test/Linker/globalorder.ll new file mode 100644 index 000000000000..caab97a6ba86 --- /dev/null +++ b/llvm/test/Linker/globalorder.ll @@ -0,0 +1,27 @@ +; Test the order of global variables during llvm-link + +; RUN: llvm-link %s %S/Inputs/globalorder-2.ll -o %t.bc +; RUN: llvm-dis -o - %t.bc | FileCheck %s + +@var1 = internal global i32 0, align 4 +@var2 = internal global i32 0, align 4 +@var3 = global i32* @var1, align 4 +@var4 = global i32* @var2, align 4 + +define i32 @foo() { +entry: + %0 = load i32*, i32** @var3, align 4 + %1 = load i32, i32* %0, align 4 + %2 = load i32*, i32** @var4, align 4 + %3 = load i32, i32* %2, align 4 + %add = add nsw i32 %3, %1 + ret i32 %add +} +; CHECK: @var1 = +; CHECK-NEXT: @var2 = +; CHECK-NEXT: @var3 = +; CHECK-NEXT: @var4 = +; CHECK-NEXT: @var5 = +; CHECK-NEXT: @var6 = +; CHECK-NEXT: @var7 = +; CHECK-NEXT: @var8 = diff --git a/llvm/test/Linker/link-flags.ll b/llvm/test/Linker/link-flags.ll index 1a57e8aa4d28..4e8eaa62dd78 100644 --- a/llvm/test/Linker/link-flags.ll +++ b/llvm/test/Linker/link-flags.ll @@ -9,8 +9,8 @@ CI-LABEL: @X = internal global i32 5 CU-LABEL:@U = global i32 6 CI-LABEL:@U = internal global i32 6 CN-NOT:@U -DI-LABEL: @Y = global i8 42 DI-LABEL: @llvm.used = appending global [2 x i8*] [i8* @Y, i8* bitcast (i64 ()* @foo to i8*)], section "llvm.metadata" +DI-LABEL: @Y = global i8 42 B-LABEL: define void @bar() { diff --git a/llvm/test/Linker/metadata-attach.ll b/llvm/test/Linker/metadata-attach.ll index 368c72a9c5f8..6b6c8dd9ed35 100644 --- a/llvm/test/Linker/metadata-attach.ll +++ b/llvm/test/Linker/metadata-attach.ll @@ -6,17 +6,17 @@ ; CHECK-LINKED1: @g1 = global i32 0, !attach !0{{$}} @g1 = global i32 0, !attach !0 -; CHECK: @g3 = weak global i32 1, !attach !0{{$}} ; CHECK: @g2 = external global i32, !attach !0{{$}} +; CHECK: @g3 = weak global i32 1, !attach !0{{$}} ; CHECK-LINKED1: @g2 = global i32 1, !attach !1{{$}} @g2 = external global i32, !attach !0 ; CHECK-LINKED1: @g3 = global i32 2, !attach !1{{$}} @g3 = weak global i32 1, !attach !0 -; CHECK-LINKED2: @g2 = global i32 1, !attach !0{{$}} -; CHECK-LINKED2: @g3 = global i32 2, !attach !0{{$}} -; CHECK-LINKED2: @g1 = global i32 0, !attach !1{{$}} +; CHECK-LINKED2: @g1 = global i32 0, !attach !0{{$}} +; CHECK-LINKED2: @g2 = global i32 1, !attach !1{{$}} +; CHECK-LINKED2: @g3 = global i32 2, !attach !1{{$}} ; CHECK: define void @f1() !attach !0 { ; CHECK-LINKED1: define void @f1() !attach !0 { @@ -36,14 +36,14 @@ define weak void @f3() !attach !0 { ret void } -; CHECK-LINKED2: define void @f2() !attach !0 { -; CHECK-LINKED2: define void @f3() !attach !0 { -; CHECK-LINKED2: define void @f1() !attach !1 { +; CHECK-LINKED2: define void @f2() !attach !1 { +; CHECK-LINKED2: define void @f3() !attach !1 { +; CHECK-LINKED2: define void @f1() !attach !0 { ; CHECK-LINKED1: !0 = !{i32 0} ; CHECK-LINKED1: !1 = !{i32 1} -; CHECK-LINKED2: !0 = !{i32 1} -; CHECK-LINKED2: !1 = !{i32 0} +; CHECK-LINKED2: !0 = !{i32 0} +; CHECK-LINKED2: !1 = !{i32 1} !0 = !{i32 0} diff --git a/llvm/test/Linker/testlink.ll b/llvm/test/Linker/testlink.ll index 6a316a3bf846..69870b50413c 100644 --- a/llvm/test/Linker/testlink.ll +++ b/llvm/test/Linker/testlink.ll @@ -1,7 +1,7 @@ ; RUN: llvm-link %s %S/Inputs/testlink.ll -S | FileCheck %s -; CHECK: %Ty2 = type { %Ty1* } ; CHECK: %Ty1 = type { %Ty2* } +; CHECK: %Ty2 = type { %Ty1* } %Ty1 = type opaque %Ty2 = type { %Ty1* } diff --git a/llvm/test/ThinLTO/X86/import-constant.ll b/llvm/test/ThinLTO/X86/import-constant.ll index 1bc2a1c2f7a4..e5254948f591 100644 --- a/llvm/test/ThinLTO/X86/import-constant.ll +++ b/llvm/test/ThinLTO/X86/import-constant.ll @@ -28,9 +28,9 @@ ; PROMOTE: @_ZL3Obj.llvm.{{.*}} = hidden constant %struct.S { i32 4, i32 8, i32* @val } ; @outer is a write-only variable, so it's been converted to zeroinitializer. -; IMPORT: @outer = internal local_unnamed_addr global %struct.Q zeroinitializer +; IMPORT: @val = available_externally global i32 42 ; IMPORT-NEXT: @_ZL3Obj.llvm.{{.*}} = available_externally hidden constant %struct.S { i32 4, i32 8, i32* @val } -; IMPORT-NEXT: @val = available_externally global i32 42 +; IMPORT-NEXT: @outer = internal local_unnamed_addr global %struct.Q zeroinitializer ; OPT: @outer = internal unnamed_addr global %struct.Q zeroinitializer @@ -39,8 +39,8 @@ ; OPT-NEXT: store %struct.S* null, %struct.S** getelementptr inbounds (%struct.Q, %struct.Q* @outer, i64 0, i32 0) ; OPT-NEXT: ret i32 12 -; NOREFS: @outer = internal local_unnamed_addr global %struct.Q zeroinitializer -; NOREFS-NEXT: @_ZL3Obj.llvm.{{.*}} = external hidden constant %struct.S +; NOREFS: @_ZL3Obj.llvm.{{.*}} = external hidden constant %struct.S +; NOREFS-NEXT: @outer = internal local_unnamed_addr global %struct.Q zeroinitializer target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/ThinLTO/X86/index-const-prop.ll b/llvm/test/ThinLTO/X86/index-const-prop.ll index 9718aec8a944..b8028d65fa58 100644 --- a/llvm/test/ThinLTO/X86/index-const-prop.ll +++ b/llvm/test/ThinLTO/X86/index-const-prop.ll @@ -19,14 +19,14 @@ ; RUN: llvm-lto -thinlto-action=import -exported-symbol main -exported-symbol gBar %t1.bc -thinlto-index=%t3.index.bc -o %t1.imported2.bc ; RUN: llvm-dis %t1.imported2.bc -o - | FileCheck %s --check-prefix=IMPORT2 -; IMPORT: @gFoo.llvm.0 = internal unnamed_addr global i32 1, align 4, !dbg !0 -; IMPORT-NEXT: @gBar = internal local_unnamed_addr global i32 2, align 4, !dbg !5 +; IMPORT: @gBar = internal local_unnamed_addr global i32 2, align 4, !dbg !0 +; IMPORT-NEXT: @gFoo.llvm.0 = internal unnamed_addr global i32 1, align 4, !dbg !5 ; IMPORT: !DICompileUnit({{.*}}) ; OPTIMIZE: define i32 @main ; OPTIMIZE-NEXT: ret i32 3 -; IMPORT2: @gBar = available_externally local_unnamed_addr global i32 2, align 4, !dbg !5 +; IMPORT2: @gBar = available_externally local_unnamed_addr global i32 2, align 4, !dbg !0 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-linux-gnu" diff --git a/llvm/test/ThinLTO/X86/index-const-prop2.ll b/llvm/test/ThinLTO/X86/index-const-prop2.ll index 5bf40fc688d5..68fe058a8637 100644 --- a/llvm/test/ThinLTO/X86/index-const-prop2.ll +++ b/llvm/test/ThinLTO/X86/index-const-prop2.ll @@ -57,13 +57,13 @@ ; with corresponsing stores ; RUN: llvm-dis %t5.2.5.precodegen.bc -o - | FileCheck %s --check-prefix=CODEGEN2-SRC -; IMPORT: @gFoo.llvm.0 = internal unnamed_addr global i32 1, align 4 -; IMPORT-NEXT: @gBar = internal local_unnamed_addr global i32 2, align 4 +; IMPORT: @gBar = internal local_unnamed_addr global i32 2, align 4 +; IMPORT-NEXT: @gFoo.llvm.0 = internal unnamed_addr global i32 1, align 4 ; IMPORT: !DICompileUnit({{.*}}) ; Write only variables are imported with a zero initializer. -; IMPORT-WRITEONLY: @gFoo.llvm.0 = internal unnamed_addr global i32 0 ; IMPORT-WRITEONLY: @gBar = internal local_unnamed_addr global i32 0 +; IMPORT-WRITEONLY: @gFoo.llvm.0 = internal unnamed_addr global i32 0 ; CODEGEN: i32 @main() ; CODEGEN-NEXT: ret i32 3 diff --git a/llvm/test/ThinLTO/X86/writeonly.ll b/llvm/test/ThinLTO/X86/writeonly.ll index 7616f192fbfc..65c93a79afa9 100644 --- a/llvm/test/ThinLTO/X86/writeonly.ll +++ b/llvm/test/ThinLTO/X86/writeonly.ll @@ -11,8 +11,8 @@ ; RUN: llvm-dis %t1.imported.bc -o - | FileCheck %s --check-prefix=IMPORT ; RUN: llvm-lto -thinlto-action=optimize %t1.imported.bc -o - | llvm-dis - -o - | FileCheck %s --check-prefix=OPTIMIZE -; IMPORT: @gFoo.llvm.0 = internal unnamed_addr global i32 0, align 4, !dbg !0 -; IMPORT-NEXT: @gBar = internal local_unnamed_addr global i32 0, align 4, !dbg !5 +; IMPORT: @gBar = internal local_unnamed_addr global i32 0, align 4, !dbg !0 +; IMPORT-NEXT: @gFoo.llvm.0 = internal unnamed_addr global i32 0, align 4, !dbg !5 ; IMPORT: !DICompileUnit({{.*}}) ; STATS: 2 module-summary-index - Number of live global variables marked write only @@ -29,8 +29,8 @@ ; RUN: llvm-lto -propagate-attrs=false -thinlto-action=import -exported-symbol=main %t1.bc -thinlto-index=%t3.index.bc -o %t1.imported.bc -stats 2>&1 | FileCheck %s --check-prefix=STATS-NOPROP ; RUN: llvm-dis %t1.imported.bc -o - | FileCheck %s --check-prefix=IMPORT-NOPROP ; STATS-NOPROP-NOT: Number of live global variables marked write only -; IMPORT-NOPROP: @gFoo.llvm.0 = available_externally -; IMPORT-NOPROP-NEXT: @gBar = available_externally +; IMPORT-NOPROP: @gBar = available_externally +; IMPORT-NOPROP-NEXT: @gFoo.llvm.0 = available_externally target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-linux-gnu" diff --git a/llvm/test/ThinLTO/X86/writeonly2.ll b/llvm/test/ThinLTO/X86/writeonly2.ll index 2648727f0997..f31f9b35a253 100644 --- a/llvm/test/ThinLTO/X86/writeonly2.ll +++ b/llvm/test/ThinLTO/X86/writeonly2.ll @@ -19,8 +19,8 @@ ; with corresponsing stores ; RUN: llvm-dis %t3.2.5.precodegen.bc -o - | FileCheck %s --check-prefix=CODEGEN-SRC -; IMPORT: @gFoo.llvm.0 = internal unnamed_addr global i32 0, align 4 -; IMPORT-NEXT: @gBar = internal local_unnamed_addr global i32 0, align 4 +; IMPORT: @gBar = internal local_unnamed_addr global i32 0, align 4 +; IMPORT-NEXT: @gFoo.llvm.0 = internal unnamed_addr global i32 0, align 4 ; IMPORT: !DICompileUnit({{.*}}) ; CODEGEN-NOT: gFoo </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/gnu-release-aarch64-spec2k6-O2 - Build # 28 - Fixed!

by ci_notify＠linaro.org

Successfully identified regression in *gcc* in CI configuration tcwg_bmk_gnu_tx1/gnu-release-aarch64-spec2k6-O2. So far, this commit has regressed CI configurations: - tcwg_bmk_gnu_tx1/gnu-release-aarch64-spec2k6-O2 Culprit: <cut> commit ea8dd3b6cea2a4d4dab7e2997b88a170f8093ce6 Author: Jan Hubicka <jh(a)suse.cz> Date: Tue Nov 19 19:56:26 2019 +0100 Avoid redundant computations in edge_badness. * ipa-inline.c (inlining_speedup): New function. (edge_badness): Use it. From-SVN: r278459 </cut> Results regressed to (for first_bad == ea8dd3b6cea2a4d4dab7e2997b88a170f8093ce6) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # true: 0 # benchmark -- -O2 artifacts/build-ea8dd3b6cea2a4d4dab7e2997b88a170f8093ce6/results_id: 1 # 447.dealII,[.] _ZNK12SparseMatrixIdE5vmultI6VectorIdES3_EEvRT regressed by 116 from (for last_good == 4aa5fd8aca1140adf0917dc53397efddc7fd4c11) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # true: 0 # benchmark -- -O2 artifacts/build-4aa5fd8aca1140adf0917dc53397efddc7fd4c11/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Results ID of last_good: tx1_64/tcwg_bmk_gnu_tx1/bisect-gnu-release-aarch64-spec2k6-O2/3988 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Results ID of first_bad: tx1_64/tcwg_bmk_gnu_tx1/bisect-gnu-release-aarch64-spec2k6-O2/3979 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Configuration details: Reproduce builds: <cut> mkdir investigate-gcc-ea8dd3b6cea2a4d4dab7e2997b88a170f8093ce6 cd investigate-gcc-ea8dd3b6cea2a4d4dab7e2997b88a170f8093ce6 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /gcc/ ./ ./bisect/baseline/ cd gcc # Reproduce first_bad build git checkout --detach ea8dd3b6cea2a4d4dab7e2997b88a170f8093ce6 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 4aa5fd8aca1140adf0917dc53397efddc7fd4c11 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Full commit (up to 1000 lines): <cut> commit ea8dd3b6cea2a4d4dab7e2997b88a170f8093ce6 Author: Jan Hubicka <jh(a)suse.cz> Date: Tue Nov 19 19:56:26 2019 +0100 Avoid redundant computations in edge_badness. * ipa-inline.c (inlining_speedup): New function. (edge_badness): Use it. From-SVN: r278459 --- gcc/ChangeLog | 5 +++++ gcc/ipa-inline.c | 37 ++++++++++++++++++++++++++++++------- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3516235f606..710f8dab674 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2019-11-18 Jan Hubicka <jh(a)suse.cz> + + * ipa-inline.c (inlining_speedup): New function. + (edge_badness): Use it. + 2019-11-19 Zoran Jovanovic <zoran.jovanovic(a)mips.com> Dragan Mladjenovic <dmladjenovic(a)wavecomp.com> diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c index 1f77ba25ce0..becea8a3e8e 100644 --- a/gcc/ipa-inline.c +++ b/gcc/ipa-inline.c @@ -768,6 +768,33 @@ compute_inlined_call_time (struct cgraph_edge *edge, return time; } +/* Determine time saved by inlininig EDGE of frequency FREQ + where callee's runtime w/o inlineing is UNINLINED_TYPE + and with inlined is INLINED_TYPE. */ + +inline sreal +inlining_speedup (struct cgraph_edge *edge, + sreal freq, + sreal uninlined_time, + sreal inlined_time) +{ + sreal speedup = uninlined_time - inlined_time; + /* Handling of call_time should match one in ipa-inline-fnsummary.c + (estimate_edge_size_and_time). */ + sreal call_time = ipa_call_summaries->get (edge)->call_stmt_time; + + if (freq > 0) + { + speedup = (speedup + call_time); + if (freq != 1) + speedup = speedup * freq; + } + else if (freq == 0) + speedup = speedup >> 11; + gcc_checking_assert (speedup >= 0); + return speedup; +} + /* Return true if the speedup for inlining E is bigger than PARAM_MAX_INLINE_MIN_SPEEDUP. */ @@ -1149,10 +1176,8 @@ edge_badness (struct cgraph_edge *edge, bool dump) sreal numerator, denominator; int overall_growth; sreal freq = edge->sreal_frequency (); - sreal inlined_time = compute_inlined_call_time (edge, edge_time, freq); - numerator = (compute_uninlined_call_time (edge, unspec_edge_time, freq) - - inlined_time); + numerator = inlining_speedup (edge, freq, unspec_edge_time, edge_time); if (numerator <= 0) numerator = ((sreal) 1 >> 8); if (caller->count.ipa ().nonzero_p ()) @@ -1235,16 +1260,14 @@ edge_badness (struct cgraph_edge *edge, bool dump) fprintf (dump_file, " %f: guessed profile. frequency %f, count %" PRId64 " caller count %" PRId64 - " time w/o inlining %f, time with inlining %f" + " time saved %f" " overall growth %i (current) %i (original)" " %i (compensated)\n", badness.to_double (), freq.to_double (), edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1, caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1, - compute_uninlined_call_time (edge, - unspec_edge_time, freq).to_double (), - inlined_time.to_double (), + inlining_speedup (edge, freq, unspec_edge_time, edge_time).to_double (), estimate_growth (callee), callee_info->growth, overall_growth); } </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tk1/llvm-release-arm-spec2k6-O2_LTO - Build # 10 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O2_LTO. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O2_LTO Culprit: <cut> commit 34eb0adaa9cd76f5bb0549f5a32d6ef452fe977e Author: peter klausler <pklausler(a)nvidia.com> Date: Tue Feb 2 10:51:14 2021 -0800 [flang] Add -fsyntax-only to f18; retain -fparse-only synonym Now that semantics is working, the standard -fsyntax-only option of GNU and Clang should be used as the name of the option that causes f18 to just run the front-end. Support both options in f18, silently accepting the old option as a synonym for the new one (as preferred by the code owner), and replace all instances of the old -fparse-only option with -fsyntax-only throughout the source base. Differential Revision: https://reviews.llvm.org/D95887 </cut> Results regressed to (for first_bad == 34eb0adaa9cd76f5bb0549f5a32d6ef452fe977e) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2_LTO_marm artifacts/build-34eb0adaa9cd76f5bb0549f5a32d6ef452fe977e/results_id: 1 # 445.gobmk,gobmk_base.default regressed by 103 from (for last_good == 81b69879c946533c71cc484bd8d9202bf1e34bfe) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2_LTO_marm artifacts/build-81b69879c946533c71cc484bd8d9202bf1e34bfe/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of last_good: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O2_LTO/3985 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of first_bad: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O2_LTO/3981 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-34eb0adaa9cd76f5bb0549f5a32d6ef452fe977e cd investigate-llvm-34eb0adaa9cd76f5bb0549f5a32d6ef452fe977e git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach 34eb0adaa9cd76f5bb0549f5a32d6ef452fe977e ../artifacts/test.sh # Reproduce last_good build git checkout --detach 81b69879c946533c71cc484bd8d9202bf1e34bfe ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Full commit (up to 1000 lines): <cut> commit 34eb0adaa9cd76f5bb0549f5a32d6ef452fe977e Author: peter klausler <pklausler(a)nvidia.com> Date: Tue Feb 2 10:51:14 2021 -0800 [flang] Add -fsyntax-only to f18; retain -fparse-only synonym Now that semantics is working, the standard -fsyntax-only option of GNU and Clang should be used as the name of the option that causes f18 to just run the front-end. Support both options in f18, silently accepting the old option as a synonym for the new one (as preferred by the code owner), and replace all instances of the old -fparse-only option with -fsyntax-only throughout the source base. Differential Revision: https://reviews.llvm.org/D95887 --- flang/docs/ImplementingASemanticCheck.md | 2 +- flang/docs/Overview.md | 4 ++-- flang/test/Evaluate/test_folding.sh | 2 +- flang/test/Flang-Driver/parse-error.f95 | 2 +- flang/test/Flang-Driver/syntax-only.f90 | 2 +- flang/test/Frontend/prescanner-diag.f90 | 2 +- flang/test/Lower/pre-fir-tree01.f90 | 2 +- flang/test/Lower/pre-fir-tree02.f90 | 2 +- flang/test/Lower/pre-fir-tree03.f90 | 2 +- flang/test/Lower/pre-fir-tree04.f90 | 2 +- flang/test/Lower/pre-fir-tree05.f90 | 2 +- flang/test/Semantics/call17.f90 | 2 +- flang/test/Semantics/data05.f90 | 2 +- flang/test/Semantics/data08.f90 | 2 +- flang/test/Semantics/data09.f90 | 2 +- flang/test/Semantics/empty.f90 | 4 ++-- flang/test/Semantics/final02.f90 | 2 +- flang/test/Semantics/getdefinition01.f90 | 8 ++++---- flang/test/Semantics/getdefinition02.f | 6 +++--- flang/test/Semantics/getdefinition03-a.f90 | 4 ++-- flang/test/Semantics/getdefinition04.f90 | 2 +- flang/test/Semantics/getdefinition05.f90 | 4 ++-- flang/test/Semantics/getsymbols01.f90 | 2 +- flang/test/Semantics/getsymbols02.f90 | 6 +++--- flang/test/Semantics/getsymbols03-a.f90 | 2 +- flang/test/Semantics/getsymbols04.f90 | 2 +- flang/test/Semantics/getsymbols05.f90 | 2 +- flang/test/Semantics/missing_newline.f90 | 4 ++-- flang/test/Semantics/mod-file-rewriter.f90 | 8 ++++---- flang/test/Semantics/modifiable01.f90 | 2 +- flang/test/Semantics/offsets01.f90 | 2 +- flang/test/Semantics/offsets02.f90 | 2 +- flang/test/Semantics/offsets03.f90 | 2 +- flang/test/Semantics/oldparam01.f90 | 2 +- flang/test/Semantics/oldparam02.f90 | 2 +- flang/test/Semantics/oldparam03.f90 | 2 +- flang/test/Semantics/resolve100.f90 | 2 +- flang/test/Semantics/rewrite01.f90 | 2 +- flang/test/Semantics/test_errors.sh | 2 +- flang/test/Semantics/test_modfile.sh | 2 +- flang/test/Semantics/typeinfo01.f90 | 2 +- flang/tools/f18-parse-demo/f18-parse-demo.cpp | 10 +++++----- flang/tools/f18/CMakeLists.txt | 2 +- flang/tools/f18/f18.cpp | 10 +++++----- 44 files changed, 67 insertions(+), 67 deletions(-) diff --git a/flang/docs/ImplementingASemanticCheck.md b/flang/docs/ImplementingASemanticCheck.md index 35b107e4988e..4a6fe133f21f 100644 --- a/flang/docs/ImplementingASemanticCheck.md +++ b/flang/docs/ImplementingASemanticCheck.md @@ -67,7 +67,7 @@ of the call to `intentOutFunc()`: I also used this program to produce a parse tree for the program using the command: ```bash - f18 -fdebug-dump-parse-tree -fparse-only testfun.f90 + f18 -fdebug-dump-parse-tree -fsyntax-only testfun.f90 ``` Here's the relevant fragment of the parse tree produced by the compiler: diff --git a/flang/docs/Overview.md b/flang/docs/Overview.md index 987858943845..4ef04f865083 100644 --- a/flang/docs/Overview.md +++ b/flang/docs/Overview.md @@ -46,7 +46,7 @@ See: [Preprocessing.md](Preprocessing.md). **Entry point:** `parser::Parsing::Parse` **Command:** - - `f18 -fdebug-dump-parse-tree -fparse-only src.f90` dumps the parse tree + - `f18 -fdebug-dump-parse-tree -fsyntax-only src.f90` dumps the parse tree - `f18 -funparse src.f90` converts the parse tree to normalized Fortran ## Validate Labels and Canonicalize Do Statements @@ -74,7 +74,7 @@ See: [Preprocessing.md](Preprocessing.md). **Entry points:** `semantics::ResolveNames`, `semantics::RewriteParseTree` -**Command:** `f18 -fdebug-dump-symbols -fparse-only src.f90` dumps the +**Command:** `f18 -fdebug-dump-symbols -fsyntax-only src.f90` dumps the tree of scopes and symbols in each scope ## Check DO CONCURRENT Constraints diff --git a/flang/test/Evaluate/test_folding.sh b/flang/test/Evaluate/test_folding.sh index 81ecbea55274..951ef36ecd2c 100755 --- a/flang/test/Evaluate/test_folding.sh +++ b/flang/test/Evaluate/test_folding.sh @@ -32,7 +32,7 @@ temp=$1 mkdir -p $temp shift -CMD="$* -fdebug-dump-symbols -fparse-only" +CMD="$* -fdebug-dump-symbols -fsyntax-only" # Check if tests should assume folding is using libpgmath if [[ $LIBPGMATH ]]; then diff --git a/flang/test/Flang-Driver/parse-error.f95 b/flang/test/Flang-Driver/parse-error.f95 index 84a63665659d..8266d042df02 100644 --- a/flang/test/Flang-Driver/parse-error.f95 +++ b/flang/test/Flang-Driver/parse-error.f95 @@ -1,5 +1,5 @@ ! RUN: not %flang-new -fc1 -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix=ERROR -! RUN: not %f18 -parse-only %s 2>&1 | FileCheck %s --check-prefix=ERROR +! RUN: not %f18 -syntax-only %s 2>&1 | FileCheck %s --check-prefix=ERROR ! REQUIRES: new-flang-driver diff --git a/flang/test/Flang-Driver/syntax-only.f90 b/flang/test/Flang-Driver/syntax-only.f90 index f04dd713aeab..0eed3cca2fe9 100644 --- a/flang/test/Flang-Driver/syntax-only.f90 +++ b/flang/test/Flang-Driver/syntax-only.f90 @@ -1,5 +1,5 @@ ! RUN: not %flang-new -fc1 -fsyntax-only %s 2>&1 | FileCheck %s -! RUN: not %f18 -fparse-only %s 2>&1 | FileCheck %s +! RUN: not %f18 -fsyntax-only %s 2>&1 | FileCheck %s ! REQUIRES: new-flang-driver diff --git a/flang/test/Frontend/prescanner-diag.f90 b/flang/test/Frontend/prescanner-diag.f90 index 4c7e6e369beb..26536092b41b 100644 --- a/flang/test/Frontend/prescanner-diag.f90 +++ b/flang/test/Frontend/prescanner-diag.f90 @@ -6,7 +6,7 @@ ! RUN: %flang-new -fc1 -E -I %S/Inputs/ %s 2>&1 | FileCheck %s ! Test with -fsyntax-only (i.e. ParseSyntaxOnlyAction, stops after semantic checks) -! RUN: %f18 -fparse-only -I %S/Inputs/ %s 2>&1 | FileCheck %s +! RUN: %f18 -fsyntax-only -I %S/Inputs/ %s 2>&1 | FileCheck %s ! RUN: %flang-new -fsyntax-only -I %S/Inputs/ %s 2>&1 | FileCheck %s ! RUN: %flang-new -fc1 -fsyntax-only -I %S/Inputs/ %s 2>&1 | FileCheck %s diff --git a/flang/test/Lower/pre-fir-tree01.f90 b/flang/test/Lower/pre-fir-tree01.f90 index 6b27add4659f..5e59ff784f97 100644 --- a/flang/test/Lower/pre-fir-tree01.f90 +++ b/flang/test/Lower/pre-fir-tree01.f90 @@ -1,4 +1,4 @@ -! RUN: %f18 -fdebug-pre-fir-tree -fparse-only %s | FileCheck %s +! RUN: %f18 -fdebug-pre-fir-tree -fsyntax-only %s | FileCheck %s ! Test structure of the Pre-FIR tree diff --git a/flang/test/Lower/pre-fir-tree02.f90 b/flang/test/Lower/pre-fir-tree02.f90 index 2d50a9394985..1db49605d98d 100644 --- a/flang/test/Lower/pre-fir-tree02.f90 +++ b/flang/test/Lower/pre-fir-tree02.f90 @@ -1,4 +1,4 @@ -! RUN: %f18 -fdebug-pre-fir-tree -fparse-only %s | FileCheck %s +! RUN: %f18 -fdebug-pre-fir-tree -fsyntax-only %s | FileCheck %s ! Test Pre-FIR Tree captures all the intended nodes from the parse-tree ! Coarray and OpenMP related nodes are tested in other files. diff --git a/flang/test/Lower/pre-fir-tree03.f90 b/flang/test/Lower/pre-fir-tree03.f90 index 1c8651b64f83..efc923a3fe84 100644 --- a/flang/test/Lower/pre-fir-tree03.f90 +++ b/flang/test/Lower/pre-fir-tree03.f90 @@ -1,4 +1,4 @@ -! RUN: %f18 -fdebug-pre-fir-tree -fparse-only -fopenmp %s | FileCheck %s +! RUN: %f18 -fdebug-pre-fir-tree -fsyntax-only -fopenmp %s | FileCheck %s ! Test Pre-FIR Tree captures OpenMP related constructs diff --git a/flang/test/Lower/pre-fir-tree04.f90 b/flang/test/Lower/pre-fir-tree04.f90 index 34212fbb1ff0..3f2beaf5fb47 100644 --- a/flang/test/Lower/pre-fir-tree04.f90 +++ b/flang/test/Lower/pre-fir-tree04.f90 @@ -1,4 +1,4 @@ -! RUN: %f18 -fdebug-pre-fir-tree -fparse-only %s | FileCheck %s +! RUN: %f18 -fdebug-pre-fir-tree -fsyntax-only %s | FileCheck %s ! Test Pre-FIR Tree captures all the coarray related statements diff --git a/flang/test/Lower/pre-fir-tree05.f90 b/flang/test/Lower/pre-fir-tree05.f90 index 98af5c2de944..3acc38b20d35 100644 --- a/flang/test/Lower/pre-fir-tree05.f90 +++ b/flang/test/Lower/pre-fir-tree05.f90 @@ -1,4 +1,4 @@ -! RUN: %f18 -fdebug-pre-fir-tree -fparse-only -fopenacc %s | FileCheck %s +! RUN: %f18 -fdebug-pre-fir-tree -fsyntax-only -fopenacc %s | FileCheck %s ! Test structure of the Pre-FIR tree with OpenACC construct diff --git a/flang/test/Semantics/call17.f90 b/flang/test/Semantics/call17.f90 index 1f4d2c4d9186..ace626037dd8 100644 --- a/flang/test/Semantics/call17.f90 +++ b/flang/test/Semantics/call17.f90 @@ -1,4 +1,4 @@ -! RUN: %f18 -fparse-only $s 2>&1 | FileCheck %s +! RUN: %f18 -fsyntax-only $s 2>&1 | FileCheck %s ! Regression test: don't emit a bogus error about an invalid specification expression ! in the declaration of a binding diff --git a/flang/test/Semantics/data05.f90 b/flang/test/Semantics/data05.f90 index a138b067942e..f3c7aa9be6e3 100644 --- a/flang/test/Semantics/data05.f90 +++ b/flang/test/Semantics/data05.f90 @@ -1,4 +1,4 @@ -!RUN: %f18 -fdebug-dump-symbols -fparse-only %s | FileCheck %s +!RUN: %f18 -fdebug-dump-symbols -fsyntax-only %s | FileCheck %s module m interface integer function ifunc(n) diff --git a/flang/test/Semantics/data08.f90 b/flang/test/Semantics/data08.f90 index 86a87f90163f..4040ac8e148d 100644 --- a/flang/test/Semantics/data08.f90 +++ b/flang/test/Semantics/data08.f90 @@ -1,4 +1,4 @@ -! RUN: %f18 -fdebug-dump-symbols -fparse-only %s 2>&1 | FileCheck %s +! RUN: %f18 -fdebug-dump-symbols -fsyntax-only %s 2>&1 | FileCheck %s ! CHECK: DATA statement value initializes 'jx' of type 'INTEGER(4)' with CHARACTER ! CHECK: DATA statement value initializes 'jy' of type 'INTEGER(4)' with CHARACTER ! CHECK: DATA statement value initializes 'jz' of type 'INTEGER(4)' with CHARACTER diff --git a/flang/test/Semantics/data09.f90 b/flang/test/Semantics/data09.f90 index 1550787b0d4c..4510b830ef7f 100644 --- a/flang/test/Semantics/data09.f90 +++ b/flang/test/Semantics/data09.f90 @@ -1,4 +1,4 @@ -! RUN: %f18 -fparse-only -fdebug-dump-symbols %s 2>&1 | FileCheck %s +! RUN: %f18 -fsyntax-only -fdebug-dump-symbols %s 2>&1 | FileCheck %s ! CHECK: init:[INTEGER(4)::1065353216_4,1073741824_4,1077936128_4,1082130432_4] ! Verify that the closure of EQUIVALENCE'd symbols with any DATA ! initialization produces a combined initializer. diff --git a/flang/test/Semantics/empty.f90 b/flang/test/Semantics/empty.f90 index e47c2e65342c..ff8f64258741 100644 --- a/flang/test/Semantics/empty.f90 +++ b/flang/test/Semantics/empty.f90 @@ -1,4 +1,4 @@ -! RUN: %f18 -fparse-only %s +! RUN: %f18 -fsyntax-only %s ! RUN: rm -rf %t && mkdir %t ! RUN: touch %t/empty.f90 -! RUN: %f18 -fparse-only %t/empty.f90 +! RUN: %f18 -fsyntax-only %t/empty.f90 diff --git a/flang/test/Semantics/final02.f90 b/flang/test/Semantics/final02.f90 index b58f91f3a228..f613a4228481 100644 --- a/flang/test/Semantics/final02.f90 +++ b/flang/test/Semantics/final02.f90 @@ -1,4 +1,4 @@ -!RUN: %f18 -fparse-only %s 2>&1 | FileCheck %s +!RUN: %f18 -fsyntax-only %s 2>&1 | FileCheck %s module m type :: t1 integer :: n diff --git a/flang/test/Semantics/getdefinition01.f90 b/flang/test/Semantics/getdefinition01.f90 index 57141b671754..06f2cc0f8dda 100644 --- a/flang/test/Semantics/getdefinition01.f90 +++ b/flang/test/Semantics/getdefinition01.f90 @@ -16,12 +16,12 @@ contains end module ! RUN and CHECK lines at the bottom as this test is sensitive to line numbers -! RUN: %f18 -fget-definition 6 17 18 -fparse-only %s | FileCheck --check-prefix=CHECK1 %s -! RUN: %f18 -fget-definition 7 20 23 -fparse-only %s | FileCheck --check-prefix=CHECK2 %s -! RUN: %f18 -fget-definition 14 3 4 -fparse-only %s | FileCheck --check-prefix=CHECK3 %s +! RUN: %f18 -fget-definition 6 17 18 -fsyntax-only %s | FileCheck --check-prefix=CHECK1 %s +! RUN: %f18 -fget-definition 7 20 23 -fsyntax-only %s | FileCheck --check-prefix=CHECK2 %s +! RUN: %f18 -fget-definition 14 3 4 -fsyntax-only %s | FileCheck --check-prefix=CHECK3 %s ! CHECK1: x:{{.*}}getdefinition01.f90, 5, 21-22 ! CHECK2: yyy:{{.*}}getdefinition01.f90, 5, 24-27 ! CHECK3: x:{{.*}}getdefinition01.f90, 13, 24-25 -! RUN: not %f18 -fget-definition -fparse-only %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s +! RUN: not %f18 -fget-definition -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s ! CHECK-ERROR: Invalid argument to -fget-definitions diff --git a/flang/test/Semantics/getdefinition02.f b/flang/test/Semantics/getdefinition02.f index ee7a96750da5..b32536968dc0 100644 --- a/flang/test/Semantics/getdefinition02.f +++ b/flang/test/Semantics/getdefinition02.f @@ -17,9 +17,9 @@ end module ! RUN and CHECK lines here as test is sensitive to line numbers -! RUN: %f18 -fget-definition 7 9 10 -fparse-only %s 2>&1 | FileCheck --check-prefix=CHECK1 %s -! RUN: %f18 -fget-definition 8 26 29 -fparse-only %s 2>&1 | FileCheck --check-prefix=CHECK2 %s -! RUN: %f18 -fget-definition 15 9 10 -fparse-only %s 2>&1 | FileCheck --check-prefix=CHECK3 %s +! RUN: %f18 -fget-definition 7 9 10 -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK1 %s +! RUN: %f18 -fget-definition 8 26 29 -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK2 %s +! RUN: %f18 -fget-definition 15 9 10 -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK3 %s ! CHECK1: x:{{.*}}getdefinition02.f, 5, 27-28 ! CHECK2: yyy:{{.*}}getdefinition02.f, 5, 30-33 ! CHECK3: x:{{.*}}getdefinition02.f, 14, 30-31 diff --git a/flang/test/Semantics/getdefinition03-a.f90 b/flang/test/Semantics/getdefinition03-a.f90 index ecf8c9b48389..6e61637b3546 100644 --- a/flang/test/Semantics/getdefinition03-a.f90 +++ b/flang/test/Semantics/getdefinition03-a.f90 @@ -7,7 +7,7 @@ program main x = f end program -! RUN: %f18 -fget-definition 7 6 7 -fparse-only %s | FileCheck --check-prefix=CHECK1 %s -! RUN: %f18 -fget-definition 7 2 3 -fparse-only %s | FileCheck --check-prefix=CHECK2 %s +! RUN: %f18 -fget-definition 7 6 7 -fsyntax-only %s | FileCheck --check-prefix=CHECK1 %s +! RUN: %f18 -fget-definition 7 2 3 -fsyntax-only %s | FileCheck --check-prefix=CHECK2 %s ! CHECK1: f:{{.*}}getdefinition03-b.f90, 2, 12-13 ! CHECK2: x:{{.*}}getdefinition03-a.f90, 6, 13-14 diff --git a/flang/test/Semantics/getdefinition04.f90 b/flang/test/Semantics/getdefinition04.f90 index 72429599595b..bc01f7979b42 100644 --- a/flang/test/Semantics/getdefinition04.f90 +++ b/flang/test/Semantics/getdefinition04.f90 @@ -6,5 +6,5 @@ program main x = y end program -! RUN: %f18 -fget-definition 6 3 4 -fparse-only %s | FileCheck %s +! RUN: %f18 -fget-definition 6 3 4 -fsyntax-only %s | FileCheck %s ! CHECK: x:{{.*}}getdefinition04.f90, 3, 14-15 diff --git a/flang/test/Semantics/getdefinition05.f90 b/flang/test/Semantics/getdefinition05.f90 index 315e9570d352..91952bb7fcc3 100644 --- a/flang/test/Semantics/getdefinition05.f90 +++ b/flang/test/Semantics/getdefinition05.f90 @@ -12,8 +12,8 @@ program main end program !! Inner x -! RUN: %f18 -fget-definition 9 5 6 -fparse-only %s | FileCheck --check-prefix=CHECK1 %s +! RUN: %f18 -fget-definition 9 5 6 -fsyntax-only %s | FileCheck --check-prefix=CHECK1 %s ! CHECK1: x:{{.*}}getdefinition05.f90, 7, 16-17 !! Outer y -! RUN: %f18 -fget-definition 11 7 8 -fparse-only %s | FileCheck --check-prefix=CHECK2 %s +! RUN: %f18 -fget-definition 11 7 8 -fsyntax-only %s | FileCheck --check-prefix=CHECK2 %s ! CHECK2: y:{{.*}}getdefinition05.f90, 5, 14-15 diff --git a/flang/test/Semantics/getsymbols01.f90 b/flang/test/Semantics/getsymbols01.f90 index bdb7bf053823..d26aa774ace4 100644 --- a/flang/test/Semantics/getsymbols01.f90 +++ b/flang/test/Semantics/getsymbols01.f90 @@ -15,7 +15,7 @@ contains end function end module -! RUN: %f18 -fget-symbols-sources -fparse-only %s 2>&1 | FileCheck %s +! RUN: %f18 -fget-symbols-sources -fsyntax-only %s 2>&1 | FileCheck %s ! CHECK-COUNT-1:f:{{.*}}getsymbols01.f90, 12, 26-27 ! CHECK-COUNT-1:mm1:{{.*}}getsymbols01.f90, 2, 8-11 ! CHECK-COUNT-1:s:{{.*}}getsymbols01.f90, 5, 18-19 diff --git a/flang/test/Semantics/getsymbols02.f90 b/flang/test/Semantics/getsymbols02.f90 index 0119ab16daa8..1667548f81c3 100644 --- a/flang/test/Semantics/getsymbols02.f90 +++ b/flang/test/Semantics/getsymbols02.f90 @@ -7,8 +7,8 @@ PROGRAM helloworld i = callget5() ENDPROGRAM -! RUN: %f18 -fparse-only %S/Inputs/getsymbols02-a.f90 -! RUN: %f18 -fparse-only %S/Inputs/getsymbols02-b.f90 -! RUN: %f18 -fget-symbols-sources -fparse-only %s 2>&1 | FileCheck %s +! RUN: %f18 -fsyntax-only %S/Inputs/getsymbols02-a.f90 +! RUN: %f18 -fsyntax-only %S/Inputs/getsymbols02-b.f90 +! RUN: %f18 -fget-symbols-sources -fsyntax-only %s 2>&1 | FileCheck %s ! CHECK: callget5: .{{[/\\]}}mm2b.mod, ! CHECK: get5: .{{[/\\]}}mm2a.mod, diff --git a/flang/test/Semantics/getsymbols03-a.f90 b/flang/test/Semantics/getsymbols03-a.f90 index 3cbba425d875..fddf513bcc51 100644 --- a/flang/test/Semantics/getsymbols03-a.f90 +++ b/flang/test/Semantics/getsymbols03-a.f90 @@ -7,7 +7,7 @@ program main x = f end program -! RUN: %f18 -fget-symbols-sources -fparse-only %s 2>&1 | FileCheck %s +! RUN: %f18 -fget-symbols-sources -fsyntax-only %s 2>&1 | FileCheck %s ! CHECK:f:{{.*}}getsymbols03-b.f90, 2, 12-13 ! CHECK:main:{{.*}}getsymbols03-a.f90, 4, 9-13 ! CHECK:mm3:{{.*}}getsymbols03-a.f90, 5, 6-9 diff --git a/flang/test/Semantics/getsymbols04.f90 b/flang/test/Semantics/getsymbols04.f90 index fc9b177abd90..ac8f2d0a7e44 100644 --- a/flang/test/Semantics/getsymbols04.f90 +++ b/flang/test/Semantics/getsymbols04.f90 @@ -6,7 +6,7 @@ program main x = y end program -! RUN: %f18 -fget-symbols-sources -fparse-only %s 2>&1 | FileCheck %s +! RUN: %f18 -fget-symbols-sources -fsyntax-only %s 2>&1 | FileCheck %s ! CHECK:x:{{.*}}getsymbols04.f90, 3, 14-15 ! CHECK:x:{{.*}}getsymbols04.f90, 5, 11-12 ! CHECK:y:{{.*}}getsymbols04.f90, 4, 14-15 diff --git a/flang/test/Semantics/getsymbols05.f90 b/flang/test/Semantics/getsymbols05.f90 index 624f37a74b76..6b07678e42d0 100644 --- a/flang/test/Semantics/getsymbols05.f90 +++ b/flang/test/Semantics/getsymbols05.f90 @@ -9,7 +9,7 @@ program main x = y end program -! RUN: %f18 -fget-symbols-sources -fparse-only %s 2>&1 | FileCheck %s +! RUN: %f18 -fget-symbols-sources -fsyntax-only %s 2>&1 | FileCheck %s ! CHECK:x:{{.*}}getsymbols05.f90, 3, 14-15 ! CHECK:x:{{.*}}getsymbols05.f90, 6, 16-17 ! CHECK:y:{{.*}}getsymbols05.f90, 4, 14-15 diff --git a/flang/test/Semantics/missing_newline.f90 b/flang/test/Semantics/missing_newline.f90 index 6dfafba7db86..82f9c9ceb612 100644 --- a/flang/test/Semantics/missing_newline.f90 +++ b/flang/test/Semantics/missing_newline.f90 @@ -1,4 +1,4 @@ ! RUN: echo -n "end program" > %t.f90 -! RUN: %f18 -fparse-only %t.f90 +! RUN: %f18 -fsyntax-only %t.f90 ! RUN: echo -ne "\rend program" > %t.f90 -! RUN: %f18 -fparse-only %t.f90 +! RUN: %f18 -fsyntax-only %t.f90 diff --git a/flang/test/Semantics/mod-file-rewriter.f90 b/flang/test/Semantics/mod-file-rewriter.f90 index 81252910e690..2856dd6dbdf3 100644 --- a/flang/test/Semantics/mod-file-rewriter.f90 +++ b/flang/test/Semantics/mod-file-rewriter.f90 @@ -1,8 +1,8 @@ ! RUN: rm -fr %t && mkdir %t && cd %t -! RUN: %f18 -fparse-only -fdebug-module-writer %s 2>&1 | FileCheck %s --check-prefix CHECK_CHANGED -! RUN: %f18 -fparse-only -fdebug-module-writer %s 2>&1 | FileCheck %s --check-prefix CHECK_UNCHANGED -! RUN: %f18 -fparse-only -fdebug-module-writer %p/Inputs/mod-file-unchanged.f90 2>&1 | FileCheck %s --check-prefix CHECK_UNCHANGED -! RUN: %f18 -fparse-only -fdebug-module-writer %p/Inputs/mod-file-changed.f90 2>&1 | FileCheck %s --check-prefix CHECK_CHANGED +! RUN: %f18 -fsyntax-only -fdebug-module-writer %s 2>&1 | FileCheck %s --check-prefix CHECK_CHANGED +! RUN: %f18 -fsyntax-only -fdebug-module-writer %s 2>&1 | FileCheck %s --check-prefix CHECK_UNCHANGED +! RUN: %f18 -fsyntax-only -fdebug-module-writer %p/Inputs/mod-file-unchanged.f90 2>&1 | FileCheck %s --check-prefix CHECK_UNCHANGED +! RUN: %f18 -fsyntax-only -fdebug-module-writer %p/Inputs/mod-file-changed.f90 2>&1 | FileCheck %s --check-prefix CHECK_CHANGED module m real :: x(10) diff --git a/flang/test/Semantics/modifiable01.f90 b/flang/test/Semantics/modifiable01.f90 index 391a643e3368..dfa9396565e0 100644 --- a/flang/test/Semantics/modifiable01.f90 +++ b/flang/test/Semantics/modifiable01.f90 @@ -1,4 +1,4 @@ -! RUN: not %f18 -fparse-only %s 2>&1 | FileCheck %s +! RUN: not %f18 -fsyntax-only %s 2>&1 | FileCheck %s ! Test WhyNotModifiable() explanations module prot diff --git a/flang/test/Semantics/offsets01.f90 b/flang/test/Semantics/offsets01.f90 index f5491f7b9438..78183d5cc0fc 100644 --- a/flang/test/Semantics/offsets01.f90 +++ b/flang/test/Semantics/offsets01.f90 @@ -1,4 +1,4 @@ -!RUN: %f18 -fdebug-dump-symbols -fparse-only %s | FileCheck %s +!RUN: %f18 -fdebug-dump-symbols -fsyntax-only %s | FileCheck %s ! Size and alignment of intrinsic types subroutine s1 diff --git a/flang/test/Semantics/offsets02.f90 b/flang/test/Semantics/offsets02.f90 index f2ed1d1dbe71..b76572e1761d 100644 --- a/flang/test/Semantics/offsets02.f90 +++ b/flang/test/Semantics/offsets02.f90 @@ -1,4 +1,4 @@ -!RUN: %f18 -fdebug-dump-symbols -fparse-only %s | FileCheck %s +!RUN: %f18 -fdebug-dump-symbols -fsyntax-only %s | FileCheck %s ! Size and alignment of derived types diff --git a/flang/test/Semantics/offsets03.f90 b/flang/test/Semantics/offsets03.f90 index d28b3694bddd..c1c2de464a01 100644 --- a/flang/test/Semantics/offsets03.f90 +++ b/flang/test/Semantics/offsets03.f90 @@ -1,4 +1,4 @@ -!RUN: %f18 -fdebug-dump-symbols -fparse-only %s | FileCheck %s +!RUN: %f18 -fdebug-dump-symbols -fsyntax-only %s | FileCheck %s ! Size and alignment with EQUIVALENCE and COMMON diff --git a/flang/test/Semantics/oldparam01.f90 b/flang/test/Semantics/oldparam01.f90 index 43f33a52f364..b78869ff4d90 100644 --- a/flang/test/Semantics/oldparam01.f90 +++ b/flang/test/Semantics/oldparam01.f90 @@ -1,4 +1,4 @@ -! RUN: %f18 -falternative-parameter-statement -fdebug-dump-symbols -fparse-only %s 2>&1 | FileCheck %s +! RUN: %f18 -falternative-parameter-statement -fdebug-dump-symbols -fsyntax-only %s 2>&1 | FileCheck %s ! Non-error tests for "old style" PARAMETER statements diff --git a/flang/test/Semantics/oldparam02.f90 b/flang/test/Semantics/oldparam02.f90 index 72ea5c410c7a..fd58988ba0b0 100644 --- a/flang/test/Semantics/oldparam02.f90 +++ b/flang/test/Semantics/oldparam02.f90 @@ -1,4 +1,4 @@ -! RUN: not %f18 -falternative-parameter-statement -fdebug-dump-symbols -fparse-only %s 2>&1 | FileCheck %s +! RUN: not %f18 -falternative-parameter-statement -fdebug-dump-symbols -fsyntax-only %s 2>&1 | FileCheck %s ! Error tests for "old style" PARAMETER statements subroutine subr(x1,x2,x3,x4,x5) diff --git a/flang/test/Semantics/oldparam03.f90 b/flang/test/Semantics/oldparam03.f90 index cbdb07057226..bc80f00a1966 100644 --- a/flang/test/Semantics/oldparam03.f90 +++ b/flang/test/Semantics/oldparam03.f90 @@ -1,4 +1,4 @@ -! RUN: not %f18 -fparse-only %s 2>&1 | FileCheck %s +! RUN: not %f18 -fsyntax-only %s 2>&1 | FileCheck %s ! Ensure that old-style PARAMETER statements are disabled by default. diff --git a/flang/test/Semantics/resolve100.f90 b/flang/test/Semantics/resolve100.f90 index 1e84be24c5f2..52fca54b94f7 100644 --- a/flang/test/Semantics/resolve100.f90 +++ b/flang/test/Semantics/resolve100.f90 @@ -1,4 +1,4 @@ -!RUN: %f18 -fdebug-dump-symbols -fparse-only %s | FileCheck %s +!RUN: %f18 -fdebug-dump-symbols -fsyntax-only %s | FileCheck %s program p ! CHECK: a size=4 offset=0: ObjectEntity type: LOGICAL(4) diff --git a/flang/test/Semantics/rewrite01.f90 b/flang/test/Semantics/rewrite01.f90 index 221994593422..cd5453eee6be 100644 --- a/flang/test/Semantics/rewrite01.f90 +++ b/flang/test/Semantics/rewrite01.f90 @@ -1,4 +1,4 @@ -! RUN: %f18 -fparse-only -fdebug-dump-parse-tree %s 2>&1 | FileCheck %s +! RUN: %f18 -fsyntax-only -fdebug-dump-parse-tree %s 2>&1 | FileCheck %s ! Ensure that READ(CVAR) [, item-list] is corrected when CVAR is a ! character variable so as to be a formatted read from the default ! unit, not an unformatted read from an internal unit (which is not diff --git a/flang/test/Semantics/test_errors.sh b/flang/test/Semantics/test_errors.sh index 5411482e4d3b..10feccb2f9f1 100755 --- a/flang/test/Semantics/test_errors.sh +++ b/flang/test/Semantics/test_errors.sh @@ -2,7 +2,7 @@ # Compile a source file and check errors against those listed in the file. # Change the compiler by setting the F18 environment variable. -F18_OPTIONS="-fparse-only" +F18_OPTIONS="-fsyntax-only" srcdir=$(dirname $0) source $srcdir/common.sh [[ ! -f $src ]] && die "File not found: $src" diff --git a/flang/test/Semantics/test_modfile.sh b/flang/test/Semantics/test_modfile.sh index 9205451c176d..a2aef65a101b 100755 --- a/flang/test/Semantics/test_modfile.sh +++ b/flang/test/Semantics/test_modfile.sh @@ -2,7 +2,7 @@ # Compile a source file and compare generated .mod files against expected. set -e -F18_OPTIONS="-fdebug-resolve-names -fparse-only" +F18_OPTIONS="-fdebug-resolve-names -fsyntax-only" srcdir=$(dirname $0) source $srcdir/common.sh diff --git a/flang/test/Semantics/typeinfo01.f90 b/flang/test/Semantics/typeinfo01.f90 index 834120ccb430..2274896fcd68 100644 --- a/flang/test/Semantics/typeinfo01.f90 +++ b/flang/test/Semantics/typeinfo01.f90 @@ -1,4 +1,4 @@ -!RUN: %f18 -fdebug-dump-symbols -fparse-only %s | FileCheck %s +!RUN: %f18 -fdebug-dump-symbols -fsyntax-only %s | FileCheck %s ! Tests for derived type runtime descriptions module m01 diff --git a/flang/tools/f18-parse-demo/f18-parse-demo.cpp b/flang/tools/f18-parse-demo/f18-parse-demo.cpp index 4ccc65e0631d..2033ef6c3bc2 100644 --- a/flang/tools/f18-parse-demo/f18-parse-demo.cpp +++ b/flang/tools/f18-parse-demo/f18-parse-demo.cpp @@ -87,7 +87,7 @@ struct DriverOptions { bool warnOnNonstandardUsage{false}; // -Mstandard bool warningsAreErrors{false}; // -Werror Fortran::parser::Encoding encoding{Fortran::parser::Encoding::LATIN_1}; - bool parseOnly{false}; + bool syntaxOnly{false}; bool dumpProvenance{false}; bool dumpCookedChars{false}; bool dumpUnparse{false}; @@ -217,7 +217,7 @@ std::string CompileFortran( Fortran::common::LanguageFeature::BackslashEscapes)); return {}; } - if (driver.parseOnly) { + if (driver.syntaxOnly) { return {}; } @@ -369,8 +369,8 @@ int main(int argc, char *const argv[]) { driver.dumpUnparse = true; } else if (arg == "-ftime-parse") { driver.timeParse = true; - } else if (arg == "-fparse-only") { - driver.parseOnly = true; + } else if (arg == "-fparse-only" || arg == "-fsyntax-only") { + driver.syntaxOnly = true; } else if (arg == "-c") { driver.compileOnly = true; } else if (arg == "-o") { @@ -405,7 +405,7 @@ int main(int argc, char *const argv[]) { << " -ed enable fixed form D lines\n" << " -E prescan & preprocess only\n" << " -ftime-parse measure parsing time\n" - << " -fparse-only parse only, no output except messages\n" + << " -fsyntax-only parse only, no output except messages\n" << " -funparse parse & reformat only, no code " "generation\n" << " -fdump-provenance dump the provenance table (no code)\n" diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt index 2e5350aecdc6..41237fe06003 100644 --- a/flang/tools/f18/CMakeLists.txt +++ b/flang/tools/f18/CMakeLists.txt @@ -46,7 +46,7 @@ foreach(filename ${MODULES}) set(depends ${include}/__fortran_builtins.mod) endif() add_custom_command(OUTPUT ${include}/${filename}.mod - COMMAND f18 -fparse-only -I${include} + COMMAND f18 -fsyntax-only -I${include} ${FLANG_SOURCE_DIR}/module/${filename}.f90 WORKING_DIRECTORY ${include} DEPENDS f18 ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${depends} diff --git a/flang/tools/f18/f18.cpp b/flang/tools/f18/f18.cpp index fecd37d49936..4546353fe010 100644 --- a/flang/tools/f18/f18.cpp +++ b/flang/tools/f18/f18.cpp @@ -92,7 +92,7 @@ struct DriverOptions { bool warningsAreErrors{false}; // -Werror bool byteswapio{false}; // -byteswapio Fortran::parser::Encoding encoding{Fortran::parser::Encoding::UTF_8}; - bool parseOnly{false}; + bool syntaxOnly{false}; bool dumpProvenance{false}; bool dumpCookedChars{false}; bool dumpUnparse{false}; @@ -327,7 +327,7 @@ std::string CompileFortran(std::string path, Fortran::parser::Options options, exitStatus = EXIT_FAILURE; } } - if (driver.parseOnly) { + if (driver.syntaxOnly) { return {}; } @@ -544,8 +544,8 @@ int main(int argc, char *const argv[]) { driver.dumpUnparseWithSymbols = true; } else if (arg == "-funparse-typed-exprs-to-f18-fc") { driver.unparseTypedExprsToF18_FC = true; - } else if (arg == "-fparse-only") { - driver.parseOnly = true; + } else if (arg == "-fparse-only" || arg == "-fsyntax-only") { + driver.syntaxOnly = true; } else if (arg == "-c") { driver.compileOnly = true; } else if (arg == "-o") { @@ -649,7 +649,7 @@ int main(int argc, char *const argv[]) { << " -module dir module output directory (default .)\n" << " -flatin interpret source as Latin-1 (ISO 8859-1) " "rather than UTF-8\n" - << " -fparse-only parse only, no output except messages\n" + << " -fsyntax-only parsing and semantics only, no output except messages\n" << " -funparse parse & reformat only, no code " "generation\n" << " -funparse-with-symbols parse, resolve symbols, and unparse\n" </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_gcc_bootstrap/master-arm-bootstrap - Build # 1 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *gcc* in CI configuration tcwg_gcc_bootstrap/master-arm-bootstrap. So far, this commit has regressed CI configurations: - tcwg_gcc_bootstrap/master-arm-bootstrap Culprit: <cut> commit e4f16e9f357a38ec702fb69a0ffab9d292a6af9b Author: Thomas Schwinge <thomas(a)codesourcery.com> Date: Fri Aug 13 17:53:12 2021 +0200 Add more self-tests for 'hash_map' with Value type with non-trivial constructor/destructor ... to document the current behavior. gcc/ * hash-map-tests.c (test_map_of_type_with_ctor_and_dtor): Extend. (test_map_of_type_with_ctor_and_dtor_expand): Add function. (hash_map_tests_c_tests): Call it. </cut> Results regressed to (for first_bad == e4f16e9f357a38ec702fb69a0ffab9d292a6af9b) # reset_artifacts: -10 # true: 0 # build_abe binutils: 1 # First few build errors in logs: # 00:02:51 cc1: internal compiler error: in fail, at selftest.c:47 # 00:02:51 cc1plus: internal compiler error: in fail, at selftest.c:47 # 00:02:51 make[3]: *** [s-selftest-c] Error 1 # 00:02:51 make[3]: *** [s-selftest-c++] Error 1 # 00:02:51 make[2]: *** [all-stage1-gcc] Error 2 # 00:02:51 make[1]: *** [stage1-bubble] Error 2 # 00:02:51 make: *** [all] Error 2 from (for last_good == 602fca427df6c5f7452677cfcdd16a5b9a3ca86a) # reset_artifacts: -10 # true: 0 # build_abe binutils: 1 # build_abe bootstrap: 2 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-arm-bootstrap/1/… Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-arm-bootstrap/1/… Build top page/logs: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-arm-bootstrap/1/ Configuration details: Reproduce builds: <cut> mkdir investigate-gcc-e4f16e9f357a38ec702fb69a0ffab9d292a6af9b cd investigate-gcc-e4f16e9f357a38ec702fb69a0ffab9d292a6af9b git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-arm-bootstrap/1/… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-arm-bootstrap/1/… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-arm-bootstrap/1/… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_gnu-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /gcc/ ./ ./bisect/baseline/ cd gcc # Reproduce first_bad build git checkout --detach e4f16e9f357a38ec702fb69a0ffab9d292a6af9b ../artifacts/test.sh # Reproduce last_good build git checkout --detach 602fca427df6c5f7452677cfcdd16a5b9a3ca86a ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-arm-bootstrap/1/… Build log: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-arm-bootstrap/1/… Full commit (up to 1000 lines): <cut> commit e4f16e9f357a38ec702fb69a0ffab9d292a6af9b Author: Thomas Schwinge <thomas(a)codesourcery.com> Date: Fri Aug 13 17:53:12 2021 +0200 Add more self-tests for 'hash_map' with Value type with non-trivial constructor/destructor ... to document the current behavior. gcc/ * hash-map-tests.c (test_map_of_type_with_ctor_and_dtor): Extend. (test_map_of_type_with_ctor_and_dtor_expand): Add function. (hash_map_tests_c_tests): Call it. --- gcc/hash-map-tests.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/gcc/hash-map-tests.c b/gcc/hash-map-tests.c index 5b6b192cd28..257f2be0c26 100644 --- a/gcc/hash-map-tests.c +++ b/gcc/hash-map-tests.c @@ -278,6 +278,156 @@ test_map_of_type_with_ctor_and_dtor () ASSERT_TRUE (val_t::ndefault + val_t::ncopy == val_t::ndtor); } + + + /* Verify basic construction and destruction of Value objects. */ + { + /* Configure, arbitrary. */ + const size_t N_init = 0; + const int N_elem = 28; + + void *a[N_elem]; + for (size_t i = 0; i < N_elem; ++i) + a[i] = &a[i]; + + val_t::ndefault = 0; + val_t::ncopy = 0; + val_t::nassign = 0; + val_t::ndtor = 0; + Map m (N_init); + ASSERT_EQ (val_t::ndefault + + val_t::ncopy + + val_t::nassign + + val_t::ndtor, 0); + + for (int i = 0; i < N_elem; ++i) + { + m.get_or_insert (a[i]); + ASSERT_EQ (val_t::ndefault, 1 + i); + ASSERT_EQ (val_t::ncopy, 0); + ASSERT_EQ (val_t::nassign, 0); + ASSERT_EQ (val_t::ndtor, i); + + m.remove (a[i]); + ASSERT_EQ (val_t::ndefault, 1 + i); + ASSERT_EQ (val_t::ncopy, 0); + ASSERT_EQ (val_t::nassign, 0); + ASSERT_EQ (val_t::ndtor, 1 + i); + } + } +} + +/* Verify aspects of 'hash_table::expand'. */ + +static void +test_map_of_type_with_ctor_and_dtor_expand (bool remove_some_inline) +{ + /* Configure, so that hash table expansion triggers a few times. */ + const size_t N_init = 0; + const int N_elem = 70; + size_t expand_c_expected = 4; + size_t expand_c = 0; + + void *a[N_elem]; + for (size_t i = 0; i < N_elem; ++i) + a[i] = &a[i]; + + typedef hash_map <void *, val_t> Map; + + /* Note that we are starting with a fresh 'Map'. Even if an existing one has + been cleared out completely, there remain 'deleted' elements, and these + would disturb the following logic, where we don't have access to the + actual 'm_n_deleted' value. */ + size_t m_n_deleted = 0; + + val_t::ndefault = 0; + val_t::ncopy = 0; + val_t::nassign = 0; + val_t::ndtor = 0; + Map m (N_init); + + /* In the following, in particular related to 'expand', we're adapting from + the internal logic of 'hash_table', glossing over "some details" not + relevant for this testing here. */ + + /* Per 'hash_table::hash_table'. */ + size_t m_size; + { + unsigned int size_prime_index_ = hash_table_higher_prime_index (N_init); + m_size = prime_tab[size_prime_index_].prime; + } + + int n_expand_moved = 0; + + for (int i = 0; i < N_elem; ++i) + { + size_t elts = m.elements (); + + /* Per 'hash_table::find_slot_with_hash'. */ + size_t m_n_elements = elts + m_n_deleted; + bool expand = m_size * 3 <= m_n_elements * 4; + + m.get_or_insert (a[i]); + if (expand) + { + ++expand_c; + + /* Per 'hash_table::expand'. */ + { + unsigned int nindex = hash_table_higher_prime_index (elts * 2); + m_size = prime_tab[nindex].prime; + } + m_n_deleted = 0; + + /* All non-deleted elements have been moved. */ + n_expand_moved += i; + if (remove_some_inline) + n_expand_moved -= (i + 2) / 3; + } + + ASSERT_EQ (val_t::ndefault, 1 + i); + ASSERT_EQ (val_t::ncopy, n_expand_moved); + ASSERT_EQ (val_t::nassign, 0); + if (remove_some_inline) + ASSERT_EQ (val_t::ndtor, (i + 2) / 3); + else + ASSERT_EQ (val_t::ndtor, 0); + + /* Remove some inline. This never triggers an 'expand' here, but via + 'm_n_deleted' does influence any following one. */ + if (remove_some_inline + && !(i % 3)) + { + m.remove (a[i]); + /* Per 'hash_table::remove_elt_with_hash'. */ + m_n_deleted++; + + ASSERT_EQ (val_t::ndefault, 1 + i); + ASSERT_EQ (val_t::ncopy, n_expand_moved); + ASSERT_EQ (val_t::nassign, 0); + ASSERT_EQ (val_t::ndtor, 1 + (i + 2) / 3); + } + } + ASSERT_EQ (expand_c, expand_c_expected); + + int ndefault = val_t::ndefault; + int ncopy = val_t::ncopy; + int nassign = val_t::nassign; + int ndtor = val_t::ndtor; + + for (int i = 0; i < N_elem; ++i) + { + if (remove_some_inline + && !(i % 3)) + continue; + + m.remove (a[i]); + ++ndtor; + ASSERT_EQ (val_t::ndefault, ndefault); + ASSERT_EQ (val_t::ncopy, ncopy); + ASSERT_EQ (val_t::nassign, nassign); + ASSERT_EQ (val_t::ndtor, ndtor); + } } /* Test calling empty on a hash_map that has a key type with non-zero @@ -309,6 +459,8 @@ hash_map_tests_c_tests () test_map_of_strings_to_int (); test_map_of_int_to_strings (); test_map_of_type_with_ctor_and_dtor (); + test_map_of_type_with_ctor_and_dtor_expand (false); + test_map_of_type_with_ctor_and_dtor_expand (true); test_nonzero_empty_key (); } </cut>

3 years, 10 months

3
2
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tk1/llvm-release-arm-spec2k6-O3_LTO - Build # 6 - Fixed!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O3_LTO. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O3_LTO Culprit: <cut> commit 310b35304cdf5a230c042904655583c5532d3e91 Author: Rong Xu <xur(a)google.com> Date: Tue Feb 16 10:53:38 2021 -0800 [SampleFDO][NFC] Refactor SampleProfile.cpp Refactor SampleProfile.cpp to use the core code in CodeGen. The main changes are: (1) Move SampleProfileLoaderBaseImpl class to a header file. (2) Split SampleCoverageTracker to a head file and a cpp file. (3) Move the common codes (common options and callsiteIsHot()) to the common cpp file. Differential Revision: https://reviews.llvm.org/D96455 </cut> Results regressed to (for first_bad == 310b35304cdf5a230c042904655583c5532d3e91) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_LTO_marm artifacts/build-310b35304cdf5a230c042904655583c5532d3e91/results_id: 1 # 482.sphinx3,sphinx_livepretend_base.default regressed by 106 from (for last_good == cddc53ef088b68586094c9841a76b41bee3994a4) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_LTO_marm artifacts/build-cddc53ef088b68586094c9841a76b41bee3994a4/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of last_good: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O3_LTO/3917 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of first_bad: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O3_LTO/3930 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-310b35304cdf5a230c042904655583c5532d3e91 cd investigate-llvm-310b35304cdf5a230c042904655583c5532d3e91 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach 310b35304cdf5a230c042904655583c5532d3e91 ../artifacts/test.sh # Reproduce last_good build git checkout --detach cddc53ef088b68586094c9841a76b41bee3994a4 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Full commit (up to 1000 lines): <cut> commit 310b35304cdf5a230c042904655583c5532d3e91 Author: Rong Xu <xur(a)google.com> Date: Tue Feb 16 10:53:38 2021 -0800 [SampleFDO][NFC] Refactor SampleProfile.cpp Refactor SampleProfile.cpp to use the core code in CodeGen. The main changes are: (1) Move SampleProfileLoaderBaseImpl class to a header file. (2) Split SampleCoverageTracker to a head file and a cpp file. (3) Move the common codes (common options and callsiteIsHot()) to the common cpp file. Differential Revision: https://reviews.llvm.org/D96455 --- .../llvm/ProfileData/SampleProfileLoaderBaseImpl.h | 862 +++++++++++++++++ .../llvm/ProfileData/SampleProfileLoaderBaseUtil.h | 97 ++ llvm/lib/ProfileData/CMakeLists.txt | 1 + .../ProfileData/SampleProfileLoaderBaseUtil.cpp | 192 ++++ llvm/lib/Transforms/IPO/SampleProfile.cpp | 1001 +------------------- 5 files changed, 1161 insertions(+), 992 deletions(-) diff --git a/llvm/include/llvm/ProfileData/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/ProfileData/SampleProfileLoaderBaseImpl.h new file mode 100644 index 000000000000..f02bacb6edc3 --- /dev/null +++ b/llvm/include/llvm/ProfileData/SampleProfileLoaderBaseImpl.h @@ -0,0 +1,862 @@ +////===- SampleProfileLoadBaseImpl.h - Profile loader base impl --*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file provides the interface for the sampled PGO profile loader base +/// implementation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_SAMPLEPROFILELOADERIMPL_H +#define LLVM_TRANSFORMS_IPO_SAMPLEPROFILELOADERIMPL_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/ProfileData/SampleProf.h" +#include "llvm/ProfileData/SampleProfReader.h" +#include "llvm/ProfileData/SampleProfileLoaderBaseUtil.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/GenericDomTree.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +using namespace llvm; +using namespace sampleprof; +using ProfileCount = Function::ProfileCount; +namespace sampleprofutil { +bool callsiteIsHot(const SampleCoverageTracker *CT, + const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, + bool ProfAccForSymsInList); +} // namespace sampleprofutil +using namespace sampleprofutil; + +#define DEBUG_TYPE "sample-profile-impl" + +using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>; +using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>; +using Edge = std::pair<const BasicBlock *, const BasicBlock *>; +using EdgeWeightMap = DenseMap<Edge, uint64_t>; +using BlockEdgeMap = + DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>; + +extern cl::opt<unsigned> SampleProfileMaxPropagateIterations; +extern cl::opt<unsigned> SampleProfileRecordCoverage; +extern cl::opt<unsigned> SampleProfileSampleCoverage; +extern cl::opt<bool> NoWarnSampleUnused; + +class SampleProfileLoaderBaseImpl { +public: + SampleProfileLoaderBaseImpl(std::string Name) : Filename(Name) {} + void dump() { Reader->dump(); } + +protected: + friend class SampleCoverageTracker; + + unsigned getFunctionLoc(Function &F); + virtual ErrorOr<uint64_t> getInstWeight(const Instruction &Inst); + ErrorOr<uint64_t> getInstWeightImpl(const Instruction &Inst); + ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB); + mutable DenseMap<const DILocation *, const FunctionSamples *> + DILocation2SampleMap; + virtual const FunctionSamples * + findFunctionSamples(const Instruction &I) const; + void printEdgeWeight(raw_ostream &OS, Edge E); + void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const; + void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB); + bool computeBlockWeights(Function &F); + void findEquivalenceClasses(Function &F); + template <bool IsPostDom> + void findEquivalencesFor(BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants, + DominatorTreeBase<BasicBlock, IsPostDom> *DomTree); + + void propagateWeights(Function &F); + uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge); + void buildEdges(Function &F); + bool propagateThroughEdges(Function &F, bool UpdateBlockCount); + void clearFunctionData(); + void computeDominanceAndLoopInfo(Function &F); + bool + computeAndPropagateWeights(Function &F, + const DenseSet<GlobalValue::GUID> &InlinedGUIDs); + void emitCoverageRemarks(Function &F); + + /// Map basic blocks to their computed weights. + /// + /// The weight of a basic block is defined to be the maximum + /// of all the instruction weights in that block. + BlockWeightMap BlockWeights; + + /// Map edges to their computed weights. + /// + /// Edge weights are computed by propagating basic block weights in + /// SampleProfile::propagateWeights. + EdgeWeightMap EdgeWeights; + + /// Set of visited blocks during propagation. + SmallPtrSet<const BasicBlock *, 32> VisitedBlocks; + + /// Set of visited edges during propagation. + SmallSet<Edge, 32> VisitedEdges; + + /// Equivalence classes for block weights. + /// + /// Two blocks BB1 and BB2 are in the same equivalence class if they + /// dominate and post-dominate each other, and they are in the same loop + /// nest. When this happens, the two blocks are guaranteed to execute + /// the same number of times. + EquivalenceClassMap EquivalenceClass; + + /// Dominance, post-dominance and loop information. + std::unique_ptr<DominatorTree> DT; + std::unique_ptr<PostDominatorTree> PDT; + std::unique_ptr<LoopInfo> LI; + + /// Predecessors for each basic block in the CFG. + BlockEdgeMap Predecessors; + + /// Successors for each basic block in the CFG. + BlockEdgeMap Successors; + + /// Profile coverage tracker. + SampleCoverageTracker CoverageTracker; + + /// Profile reader object. + std::unique_ptr<SampleProfileReader> Reader; + + /// Samples collected for the body of this function. + FunctionSamples *Samples = nullptr; + + /// Name of the profile file to load. + std::string Filename; + + /// Profile Summary Info computed from sample profile. + ProfileSummaryInfo *PSI = nullptr; + + /// Optimization Remark Emitter used to emit diagnostic remarks. + OptimizationRemarkEmitter *ORE = nullptr; +}; + +/// Clear all the per-function data used to load samples and propagate weights. +void SampleProfileLoaderBaseImpl::clearFunctionData() { + BlockWeights.clear(); + EdgeWeights.clear(); + VisitedBlocks.clear(); + VisitedEdges.clear(); + EquivalenceClass.clear(); + DT = nullptr; + PDT = nullptr; + LI = nullptr; + Predecessors.clear(); + Successors.clear(); + CoverageTracker.clear(); +} + +#ifndef NDEBUG +/// Print the weight of edge \p E on stream \p OS. +/// +/// \param OS Stream to emit the output to. +/// \param E Edge to print. +void SampleProfileLoaderBaseImpl::printEdgeWeight(raw_ostream &OS, Edge E) { + OS << "weight[" << E.first->getName() << "->" << E.second->getName() + << "]: " << EdgeWeights[E] << "\n"; +} + +/// Print the equivalence class of block \p BB on stream \p OS. +/// +/// \param OS Stream to emit the output to. +/// \param BB Block to print. +void SampleProfileLoaderBaseImpl::printBlockEquivalence(raw_ostream &OS, + const BasicBlock *BB) { + const BasicBlock *Equiv = EquivalenceClass[BB]; + OS << "equivalence[" << BB->getName() + << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n"; +} + +/// Print the weight of block \p BB on stream \p OS. +/// +/// \param OS Stream to emit the output to. +/// \param BB Block to print. +void SampleProfileLoaderBaseImpl::printBlockWeight(raw_ostream &OS, + const BasicBlock *BB) const { + const auto &I = BlockWeights.find(BB); + uint64_t W = (I == BlockWeights.end() ? 0 : I->second); + OS << "weight[" << BB->getName() << "]: " << W << "\n"; +} +#endif + +/// Get the weight for an instruction. +/// +/// The "weight" of an instruction \p Inst is the number of samples +/// collected on that instruction at runtime. To retrieve it, we +/// need to compute the line number of \p Inst relative to the start of its +/// function. We use HeaderLineno to compute the offset. We then +/// look up the samples collected for \p Inst using BodySamples. +/// +/// \param Inst Instruction to query. +/// +/// \returns the weight of \p Inst. +ErrorOr<uint64_t> +SampleProfileLoaderBaseImpl::getInstWeight(const Instruction &Inst) { + return getInstWeightImpl(Inst); +} + +ErrorOr<uint64_t> +SampleProfileLoaderBaseImpl::getInstWeightImpl(const Instruction &Inst) { + const FunctionSamples *FS = findFunctionSamples(Inst); + if (!FS) + return std::error_code(); + + const DebugLoc &DLoc = Inst.getDebugLoc(); + if (!DLoc) + return std::error_code(); + + const DILocation *DIL = DLoc; + uint32_t LineOffset = FunctionSamples::getOffset(DIL); + uint32_t Discriminator = DIL->getBaseDiscriminator(); + ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator); + if (R) { + bool FirstMark = + CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get()); + if (FirstMark) { + ORE->emit([&]() { + OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst); + Remark << "Applied " << ore::NV("NumSamples", *R); + Remark << " samples from profile (offset: "; + Remark << ore::NV("LineOffset", LineOffset); + if (Discriminator) { + Remark << "."; + Remark << ore::NV("Discriminator", Discriminator); + } + Remark << ")"; + return Remark; + }); + } + LLVM_DEBUG(dbgs() << " " << DLoc.getLine() << "." + << DIL->getBaseDiscriminator() << ":" << Inst + << " (line offset: " << LineOffset << "." + << DIL->getBaseDiscriminator() << " - weight: " << R.get() + << ")\n"); + } + return R; +} + +/// Compute the weight of a basic block. +/// +/// The weight of basic block \p BB is the maximum weight of all the +/// instructions in BB. +/// +/// \param BB The basic block to query. +/// +/// \returns the weight for \p BB. +ErrorOr<uint64_t> +SampleProfileLoaderBaseImpl::getBlockWeight(const BasicBlock *BB) { + uint64_t Max = 0; + bool HasWeight = false; + for (auto &I : BB->getInstList()) { + const ErrorOr<uint64_t> &R = getInstWeight(I); + if (R) { + Max = std::max(Max, R.get()); + HasWeight = true; + } + } + return HasWeight ? ErrorOr<uint64_t>(Max) : std::error_code(); +} + +/// Compute and store the weights of every basic block. +/// +/// This populates the BlockWeights map by computing +/// the weights of every basic block in the CFG. +/// +/// \param F The function to query. +bool SampleProfileLoaderBaseImpl::computeBlockWeights(Function &F) { + bool Changed = false; + LLVM_DEBUG(dbgs() << "Block weights\n"); + for (const auto &BB : F) { + ErrorOr<uint64_t> Weight = getBlockWeight(&BB); + if (Weight) { + BlockWeights[&BB] = Weight.get(); + VisitedBlocks.insert(&BB); + Changed = true; + } + LLVM_DEBUG(printBlockWeight(dbgs(), &BB)); + } + + return Changed; +} + +/// Get the FunctionSamples for an instruction. +/// +/// The FunctionSamples of an instruction \p Inst is the inlined instance +/// in which that instruction is coming from. We traverse the inline stack +/// of that instruction, and match it with the tree nodes in the profile. +/// +/// \param Inst Instruction to query. +/// +/// \returns the FunctionSamples pointer to the inlined instance. +const FunctionSamples *SampleProfileLoaderBaseImpl::findFunctionSamples( + const Instruction &Inst) const { + const DILocation *DIL = Inst.getDebugLoc(); + if (!DIL) + return Samples; + + auto it = DILocation2SampleMap.try_emplace(DIL, nullptr); + if (it.second) { + it.first->second = Samples->findFunctionSamples(DIL, Reader->getRemapper()); + } + return it.first->second; +} + +/// Find equivalence classes for the given block. +/// +/// This finds all the blocks that are guaranteed to execute the same +/// number of times as \p BB1. To do this, it traverses all the +/// descendants of \p BB1 in the dominator or post-dominator tree. +/// +/// A block BB2 will be in the same equivalence class as \p BB1 if +/// the following holds: +/// +/// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2 +/// is a descendant of \p BB1 in the dominator tree, then BB2 should +/// dominate BB1 in the post-dominator tree. +/// +/// 2- Both BB2 and \p BB1 must be in the same loop. +/// +/// For every block BB2 that meets those two requirements, we set BB2's +/// equivalence class to \p BB1. +/// +/// \param BB1 Block to check. +/// \param Descendants Descendants of \p BB1 in either the dom or pdom tree. +/// \param DomTree Opposite dominator tree. If \p Descendants is filled +/// with blocks from \p BB1's dominator tree, then +/// this is the post-dominator tree, and vice versa. +template <bool IsPostDom> +void SampleProfileLoaderBaseImpl::findEquivalencesFor( + BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants, + DominatorTreeBase<BasicBlock, IsPostDom> *DomTree) { + const BasicBlock *EC = EquivalenceClass[BB1]; + uint64_t Weight = BlockWeights[EC]; + for (const auto *BB2 : Descendants) { + bool IsDomParent = DomTree->dominates(BB2, BB1); + bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2); + if (BB1 != BB2 && IsDomParent && IsInSameLoop) { + EquivalenceClass[BB2] = EC; + // If BB2 is visited, then the entire EC should be marked as visited. + if (VisitedBlocks.count(BB2)) { + VisitedBlocks.insert(EC); + } + + // If BB2 is heavier than BB1, make BB2 have the same weight + // as BB1. + // + // Note that we don't worry about the opposite situation here + // (when BB2 is lighter than BB1). We will deal with this + // during the propagation phase. Right now, we just want to + // make sure that BB1 has the largest weight of all the + // members of its equivalence set. + Weight = std::max(Weight, BlockWeights[BB2]); + } + } + if (EC == &EC->getParent()->getEntryBlock()) { + BlockWeights[EC] = Samples->getHeadSamples() + 1; + } else { + BlockWeights[EC] = Weight; + } +} + +/// Find equivalence classes. +/// +/// Since samples may be missing from blocks, we can fill in the gaps by setting +/// the weights of all the blocks in the same equivalence class to the same +/// weight. To compute the concept of equivalence, we use dominance and loop +/// information. Two blocks B1 and B2 are in the same equivalence class if B1 +/// dominates B2, B2 post-dominates B1 and both are in the same loop. +/// +/// \param F The function to query. +void SampleProfileLoaderBaseImpl::findEquivalenceClasses(Function &F) { + SmallVector<BasicBlock *, 8> DominatedBBs; + LLVM_DEBUG(dbgs() << "\nBlock equivalence classes\n"); + // Find equivalence sets based on dominance and post-dominance information. + for (auto &BB : F) { + BasicBlock *BB1 = &BB; + + // Compute BB1's equivalence class once. + if (EquivalenceClass.count(BB1)) { + LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1)); + continue; + } + + // By default, blocks are in their own equivalence class. + EquivalenceClass[BB1] = BB1; + + // Traverse all the blocks dominated by BB1. We are looking for + // every basic block BB2 such that: + // + // 1- BB1 dominates BB2. + // 2- BB2 post-dominates BB1. + // 3- BB1 and BB2 are in the same loop nest. + // + // If all those conditions hold, it means that BB2 is executed + // as many times as BB1, so they are placed in the same equivalence + // class by making BB2's equivalence class be BB1. + DominatedBBs.clear(); + DT->getDescendants(BB1, DominatedBBs); + findEquivalencesFor(BB1, DominatedBBs, PDT.get()); + + LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1)); + } + + // Assign weights to equivalence classes. + // + // All the basic blocks in the same equivalence class will execute + // the same number of times. Since we know that the head block in + // each equivalence class has the largest weight, assign that weight + // to all the blocks in that equivalence class. + LLVM_DEBUG( + dbgs() << "\nAssign the same weight to all blocks in the same class\n"); + for (auto &BI : F) { + const BasicBlock *BB = &BI; + const BasicBlock *EquivBB = EquivalenceClass[BB]; + if (BB != EquivBB) + BlockWeights[BB] = BlockWeights[EquivBB]; + LLVM_DEBUG(printBlockWeight(dbgs(), BB)); + } +} + +/// Visit the given edge to decide if it has a valid weight. +/// +/// If \p E has not been visited before, we copy to \p UnknownEdge +/// and increment the count of unknown edges. +/// +/// \param E Edge to visit. +/// \param NumUnknownEdges Current number of unknown edges. +/// \param UnknownEdge Set if E has not been visited before. +/// +/// \returns E's weight, if known. Otherwise, return 0. +uint64_t SampleProfileLoaderBaseImpl::visitEdge(Edge E, + unsigned *NumUnknownEdges, + Edge *UnknownEdge) { + if (!VisitedEdges.count(E)) { + (*NumUnknownEdges)++; + *UnknownEdge = E; + return 0; + } + + return EdgeWeights[E]; +} + +/// Propagate weights through incoming/outgoing edges. +/// +/// If the weight of a basic block is known, and there is only one edge +/// with an unknown weight, we can calculate the weight of that edge. +/// +/// Similarly, if all the edges have a known count, we can calculate the +/// count of the basic block, if needed. +/// +/// \param F Function to process. +/// \param UpdateBlockCount Whether we should update basic block counts that +/// has already been annotated. +/// +/// \returns True if new weights were assigned to edges or blocks. +bool SampleProfileLoaderBaseImpl::propagateThroughEdges(Function &F, + bool UpdateBlockCount) { + bool Changed = false; + LLVM_DEBUG(dbgs() << "\nPropagation through edges\n"); + for (const auto &BI : F) { + const BasicBlock *BB = &BI; + const BasicBlock *EC = EquivalenceClass[BB]; + + // Visit all the predecessor and successor edges to determine + // which ones have a weight assigned already. Note that it doesn't + // matter that we only keep track of a single unknown edge. The + // only case we are interested in handling is when only a single + // edge is unknown (see setEdgeOrBlockWeight). + for (unsigned i = 0; i < 2; i++) { + uint64_t TotalWeight = 0; + unsigned NumUnknownEdges = 0, NumTotalEdges = 0; + Edge UnknownEdge, SelfReferentialEdge, SingleEdge; + + if (i == 0) { + // First, visit all predecessor edges. + NumTotalEdges = Predecessors[BB].size(); + for (auto *Pred : Predecessors[BB]) { + Edge E = std::make_pair(Pred, BB); + TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge); + if (E.first == E.second) + SelfReferentialEdge = E; + } + if (NumTotalEdges == 1) { + SingleEdge = std::make_pair(Predecessors[BB][0], BB); + } + } else { + // On the second round, visit all successor edges. + NumTotalEdges = Successors[BB].size(); + for (auto *Succ : Successors[BB]) { + Edge E = std::make_pair(BB, Succ); + TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge); + } + if (NumTotalEdges == 1) { + SingleEdge = std::make_pair(BB, Successors[BB][0]); + } + } + + // After visiting all the edges, there are three cases that we + // can handle immediately: + // + // - All the edge weights are known (i.e., NumUnknownEdges == 0). + // In this case, we simply check that the sum of all the edges + // is the same as BB's weight. If not, we change BB's weight + // to match. Additionally, if BB had not been visited before, + // we mark it visited. + // + // - Only one edge is unknown and BB has already been visited. + // In this case, we can compute the weight of the edge by + // subtracting the total block weight from all the known + // edge weights. If the edges weight more than BB, then the + // edge of the last remaining edge is set to zero. + // + // - There exists a self-referential edge and the weight of BB is + // known. In this case, this edge can be based on BB's weight. + // We add up all the other known edges and set the weight on + // the self-referential edge as we did in the previous case. + // + // In any other case, we must continue iterating. Eventually, + // all edges will get a weight, or iteration will stop when + // it reaches SampleProfileMaxPropagateIterations. + if (NumUnknownEdges <= 1) { + uint64_t &BBWeight = BlockWeights[EC]; + if (NumUnknownEdges == 0) { + if (!VisitedBlocks.count(EC)) { + // If we already know the weight of all edges, the weight of the + // basic block can be computed. It should be no larger than the sum + // of all edge weights. + if (TotalWeight > BBWeight) { + BBWeight = TotalWeight; + Changed = true; + LLVM_DEBUG(dbgs() << "All edge weights for " << BB->getName() + << " known. Set weight for block: "; + printBlockWeight(dbgs(), BB);); + } + } else if (NumTotalEdges == 1 && + EdgeWeights[SingleEdge] < BlockWeights[EC]) { + // If there is only one edge for the visited basic block, use the + // block weight to adjust edge weight if edge weight is smaller. + EdgeWeights[SingleEdge] = BlockWeights[EC]; + Changed = true; + } + } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) { + // If there is a single unknown edge and the block has been + // visited, then we can compute E's weight. + if (BBWeight >= TotalWeight) + EdgeWeights[UnknownEdge] = BBWeight - TotalWeight; + else + EdgeWeights[UnknownEdge] = 0; + const BasicBlock *OtherEC; + if (i == 0) + OtherEC = EquivalenceClass[UnknownEdge.first]; + else + OtherEC = EquivalenceClass[UnknownEdge.second]; + // Edge weights should never exceed the BB weights it connects. + if (VisitedBlocks.count(OtherEC) && + EdgeWeights[UnknownEdge] > BlockWeights[OtherEC]) + EdgeWeights[UnknownEdge] = BlockWeights[OtherEC]; + VisitedEdges.insert(UnknownEdge); + Changed = true; + LLVM_DEBUG(dbgs() << "Set weight for edge: "; + printEdgeWeight(dbgs(), UnknownEdge)); + } + } else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) { + // If a block Weights 0, all its in/out edges should weight 0. + if (i == 0) { + for (auto *Pred : Predecessors[BB]) { + Edge E = std::make_pair(Pred, BB); + EdgeWeights[E] = 0; + VisitedEdges.insert(E); + } + } else { + for (auto *Succ : Successors[BB]) { + Edge E = std::make_pair(BB, Succ); + EdgeWeights[E] = 0; + VisitedEdges.insert(E); + } + } + } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) { + uint64_t &BBWeight = BlockWeights[BB]; + // We have a self-referential edge and the weight of BB is known. + if (BBWeight >= TotalWeight) + EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight; + else + EdgeWeights[SelfReferentialEdge] = 0; + VisitedEdges.insert(SelfReferentialEdge); + Changed = true; + LLVM_DEBUG(dbgs() << "Set self-referential edge weight to: "; + printEdgeWeight(dbgs(), SelfReferentialEdge)); + } + if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) { + BlockWeights[EC] = TotalWeight; + VisitedBlocks.insert(EC); + Changed = true; + } + } + } + + return Changed; +} + +/// Build in/out edge lists for each basic block in the CFG. +/// +/// We are interested in unique edges. If a block B1 has multiple +/// edges to another block B2, we only add a single B1->B2 edge. +void SampleProfileLoaderBaseImpl::buildEdges(Function &F) { + for (auto &BI : F) { + BasicBlock *B1 = &BI; + + // Add predecessors for B1. + SmallPtrSet<BasicBlock *, 16> Visited; + if (!Predecessors[B1].empty()) + llvm_unreachable("Found a stale predecessors list in a basic block."); + for (BasicBlock *B2 : predecessors(B1)) + if (Visited.insert(B2).second) + Predecessors[B1].push_back(B2); + + // Add successors for B1. + Visited.clear(); + if (!Successors[B1].empty()) + llvm_unreachable("Found a stale successors list in a basic block."); + for (BasicBlock *B2 : successors(B1)) + if (Visited.insert(B2).second) + Successors[B1].push_back(B2); + } +} + +/// Propagate weights into edges +/// +/// The following rules are applied to every block BB in the CFG: +/// +/// - If BB has a single predecessor/successor, then the weight +/// of that edge is the weight of the block. +/// +/// - If all incoming or outgoing edges are known except one, and the +/// weight of the block is already known, the weight of the unknown +/// edge will be the weight of the block minus the sum of all the known +/// edges. If the sum of all the known edges is larger than BB's weight, +/// we set the unknown edge weight to zero. +/// +/// - If there is a self-referential edge, and the weight of the block is +/// known, the weight for that edge is set to the weight of the block +/// minus the weight of the other incoming edges to that block (if +/// known). +void SampleProfileLoaderBaseImpl::propagateWeights(Function &F) { + bool Changed = true; + unsigned I = 0; + + // If BB weight is larger than its corresponding loop's header BB weight, + // use the BB weight to replace the loop header BB weight. + for (auto &BI : F) { + BasicBlock *BB = &BI; + Loop *L = LI->getLoopFor(BB); + if (!L) { + continue; + } + BasicBlock *Header = L->getHeader(); + if (Header && BlockWeights[BB] > BlockWeights[Header]) { + BlockWeights[Header] = BlockWeights[BB]; + } + } + + // Before propagation starts, build, for each block, a list of + // unique predecessors and successors. This is necessary to handle + // identical edges in multiway branches. Since we visit all blocks and all + // edges of the CFG, it is cleaner to build these lists once at the start + // of the pass. + buildEdges(F); + + // Propagate until we converge or we go past the iteration limit. + while (Changed && I++ < SampleProfileMaxPropagateIterations) { + Changed = propagateThroughEdges(F, false); + } + + // The first propagation propagates BB counts from annotated BBs to unknown + // BBs. The 2nd propagation pass resets edges weights, and use all BB weights + // to propagate edge weights. + VisitedEdges.clear(); + Changed = true; + while (Changed && I++ < SampleProfileMaxPropagateIterations) { + Changed = propagateThroughEdges(F, false); + } + + // The 3rd propagation pass allows adjust annotated BB weights that are + // obviously wrong. + Changed = true; + while (Changed && I++ < SampleProfileMaxPropagateIterations) { + Changed = propagateThroughEdges(F, true); + } +} + +/// Generate branch weight metadata for all branches in \p F. +/// +/// Branch weights are computed out of instruction samples using a +/// propagation heuristic. Propagation proceeds in 3 phases: +/// +/// 1- Assignment of block weights. All the basic blocks in the function +/// are initial assigned the same weight as their most frequently +/// executed instruction. +/// +/// 2- Creation of equivalence classes. Since samples may be missing from +/// blocks, we can fill in the gaps by setting the weights of all the +/// blocks in the same equivalence class to the same weight. To compute +/// the concept of equivalence, we use dominance and loop information. +/// Two blocks B1 and B2 are in the same equivalence class if B1 +/// dominates B2, B2 post-dominates B1 and both are in the same loop. +/// +/// 3- Propagation of block weights into edges. This uses a simple +/// propagation heuristic. The following rules are applied to every +/// block BB in the CFG: +/// +/// - If BB has a single predecessor/successor, then the weight +/// of that edge is the weight of the block. +/// +/// - If all the edges are known except one, and the weight of the +/// block is already known, the weight of the unknown edge will +/// be the weight of the block minus the sum of all the known +/// edges. If the sum of all the known edges is larger than BB's weight, +/// we set the unknown edge weight to zero. +/// +/// - If there is a self-referential edge, and the weight of the block is +/// known, the weight for that edge is set to the weight of the block +/// minus the weight of the other incoming edges to that block (if +/// known). +/// +/// Since this propagation is not guaranteed to finalize for every CFG, we +/// only allow it to proceed for a limited number of iterations (controlled +/// by -sample-profile-max-propagate-iterations). +/// +/// FIXME: Try to replace this propagation heuristic with a scheme +/// that is guaranteed to finalize. A work-list approach similar to +/// the standard value propagation algorithm used by SSA-CCP might +/// work here. +/// +/// \param F The function to query. +/// +/// \returns true if \p F was modified. Returns false, otherwise. +bool SampleProfileLoaderBaseImpl::computeAndPropagateWeights( + Function &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) { + bool Changed = (InlinedGUIDs.size() != 0); + + // Compute basic block weights. + Changed |= computeBlockWeights(F); + + if (Changed) { + // Add an entry count to the function using the samples gathered at the + // function entry. + // Sets the GUIDs that are inlined in the profiled binary. This is used + // for ThinLink to make correct liveness analysis, and also make the IR + // match the profiled binary before annotation. + F.setEntryCount( + ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real), + &InlinedGUIDs); + + // Compute dominance and loop info needed for propagation. + computeDominanceAndLoopInfo(F); + + // Find equivalence classes. + findEquivalenceClasses(F); + + // Propagate weights to all edges. + propagateWeights(F); + } + + return Changed; +} + +void SampleProfileLoaderBaseImpl::emitCoverageRemarks(Function &F) { + // If coverage checking was requested, compute it now. + if (SampleProfileRecordCoverage) { + unsigned Used = CoverageTracker.countUsedRecords(Samples, PSI); + unsigned Total = CoverageTracker.countBodyRecords(Samples, PSI); + unsigned Coverage = CoverageTracker.computeCoverage(Used, Total); + if (Coverage < SampleProfileRecordCoverage) { + F.getContext().diagnose(DiagnosticInfoSampleProfile( + F.getSubprogram()->getFilename(), getFunctionLoc(F), + Twine(Used) + " of " + Twine(Total) + " available profile records (" + + Twine(Coverage) + "%) were applied", + DS_Warning)); + } + } + + if (SampleProfileSampleCoverage) { + uint64_t Used = CoverageTracker.getTotalUsedSamples(); + uint64_t Total = CoverageTracker.countBodySamples(Samples, PSI); + unsigned Coverage = CoverageTracker.computeCoverage(Used, Total); + if (Coverage < SampleProfileSampleCoverage) { + F.getContext().diagnose(DiagnosticInfoSampleProfile( + F.getSubprogram()->getFilename(), getFunctionLoc(F), + Twine(Used) + " of " + Twine(Total) + " available profile samples (" + + Twine(Coverage) + "%) were applied", + DS_Warning)); + } + } +} + +/// Get the line number for the function header. +/// +/// This looks up function \p F in the current compilation unit and +/// retrieves the line number where the function is defined. This is +/// line 0 for all the samples read from the profile file. Every line +/// number is relative to this line. +/// +/// \param F Function object to query. +/// +/// \returns the line number where \p F is defined. If it returns 0, +/// it means that there is no debug information available for \p F. +unsigned SampleProfileLoaderBaseImpl::getFunctionLoc(Function &F) { + if (DISubprogram *S = F.getSubprogram()) + return S->getLine(); + + if (NoWarnSampleUnused) + return 0; + + // If the start of \p F is missing, emit a diagnostic to inform the user + // about the missed opportunity. + F.getContext().diagnose(DiagnosticInfoSampleProfile( + "No debug information found in function " + F.getName() + + ": Function profile not used", + DS_Warning)); + return 0; +} + +void SampleProfileLoaderBaseImpl::computeDominanceAndLoopInfo(Function &F) { + DT.reset(new DominatorTree); + DT->recalculate(F); + + PDT.reset(new PostDominatorTree(F)); + + LI.reset(new LoopInfo); + LI->analyze(*DT); +} + +#undef DEBUG_TYPE + +} // namespace llvm +#endif // LLVM_TRANSFORMS_IPO_SAMPLEPROFILELOADERIMPL_H diff --git a/llvm/include/llvm/ProfileData/SampleProfileLoaderBaseUtil.h b/llvm/include/llvm/ProfileData/SampleProfileLoaderBaseUtil.h new file mode 100644 index 000000000000..37dc8d8187d9 --- /dev/null +++ b/llvm/include/llvm/ProfileData/SampleProfileLoaderBaseUtil.h @@ -0,0 +1,97 @@ +////===- SampleProfileLoadBaseUtil.h - Profile loader util func --*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file provides the utility functions for the sampled PGO loader base +/// implementation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_SAMPLEPROFILELOADERUTIL_H +#define LLVM_TRANSFORMS_IPO_SAMPLEPROFILELOADERUTIL_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/CommandLine.h" + +namespace llvm { +using namespace sampleprof; + +extern cl::opt<unsigned> SampleProfileMaxPropagateIterations; +extern cl::opt<unsigned> SampleProfileRecordCoverage; +extern cl::opt<unsigned> SampleProfileSampleCoverage; +extern cl::opt<bool> NoWarnSampleUnused; + +namespace sampleprofutil { + +class SampleCoverageTracker { +public: + bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset, + uint32_t Discriminator, uint64_t Samples); + unsigned computeCoverage(unsigned Used, unsigned Total) const; + unsigned countUsedRecords(const FunctionSamples *FS, + ProfileSummaryInfo *PSI) const; + unsigned countBodyRecords(const FunctionSamples *FS, + ProfileSummaryInfo *PSI) const; + uint64_t getTotalUsedSamples() const { return TotalUsedSamples; } + uint64_t countBodySamples(const FunctionSamples *FS, + ProfileSummaryInfo *PSI) const; + + void clear() { + SampleCoverage.clear(); + TotalUsedSamples = 0; + } + void setProfAccForSymsInList(bool V) { ProfAccForSymsInList = V; } + +private: + using BodySampleCoverageMap = std::map<LineLocation, unsigned>; + using FunctionSamplesCoverageMap = + DenseMap<const FunctionSamples *, BodySampleCoverageMap>; + + /// Coverage map for sampling records. + /// + /// This map keeps a record of sampling records that have been matched to + /// an IR instruction. This is used to detect some form of staleness in + /// profiles (see flag -sample-profile-check-coverage). + /// + /// Each entry in the map corresponds to a FunctionSamples instance. This is + /// another map that counts how many times the sample record at the + /// given location has been used. + FunctionSamplesCoverageMap SampleCoverage; + + /// Number of samples used from the profile. + /// + /// When a sampling record is used for the first time, the samples from + /// that record are added to this accumulator. Coverage is later computed + /// based on the total number of samples available in this function and + /// its callsites. + /// + /// Note that this accumulator tracks samples used from a single function + /// and all the inlined callsites. Strictly, we should have a map of counters + /// keyed by FunctionSamples pointers, but these stats are cleared after + /// every function, so we just need to keep a single counter. + uint64_t TotalUsedSamples = 0; + + // For symbol in profile symbol list, whether to regard their profiles + // to be accurate. This is passed from the SampleLoader instance. + bool ProfAccForSymsInList = false; +}; + +/// Return true if the given callsite is hot wrt to hot cutoff threshold. +bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, + bool ProfAccForSymsInList); + +} // end of namespace sampleprofutil +} // end of namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_SAMPLEPROFILELOADERUTIL_H diff --git a/llvm/lib/ProfileData/CMakeLists.txt b/llvm/lib/ProfileData/CMakeLists.txt index 2a377e4d74d3..4125fac918ab 100644 --- a/llvm/lib/ProfileData/CMakeLists.txt +++ b/llvm/lib/ProfileData/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_component_library(LLVMProfileData InstrProfWriter.cpp ProfileSummaryBuilder.cpp </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/llvm-master-aarch64-spec2k6-O2 - Build # 15 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O2. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O2 Culprit: <cut> commit a0a9c9e188f5b97ff8b74287d1536f57ec5dda54 Author: Sanjay Patel <spatel(a)rotateright.com> Date: Wed Aug 11 12:41:47 2021 -0400 [InstCombine] avoid breaking up min/max (cmp+sel) idioms This is a quick fix for a motivating case that looks like this: https://godbolt.org/z/GeMqzMc38 As noted, we might be able to restore the min/max patterns with select folds, or we just wait for this to become easier with canonicalization to min/max intrinsics. </cut> Results regressed to (for first_bad == a0a9c9e188f5b97ff8b74287d1536f57ec5dda54) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2 artifacts/build-a0a9c9e188f5b97ff8b74287d1536f57ec5dda54/results_id: 1 # 464.h264ref,h264ref_base.default regressed by 106 # 464.h264ref,[.] FastFullPelBlockMotionSearch regressed by 146 from (for last_good == 5bf4ab0e79e1a8552019918a662bdf7af8b3825a) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2 artifacts/build-5bf4ab0e79e1a8552019918a662bdf7af8b3825a/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O2/3875 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O2/3877 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-a0a9c9e188f5b97ff8b74287d1536f57ec5dda54 cd investigate-llvm-a0a9c9e188f5b97ff8b74287d1536f57ec5dda54 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach a0a9c9e188f5b97ff8b74287d1536f57ec5dda54 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 5bf4ab0e79e1a8552019918a662bdf7af8b3825a ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Full commit (up to 1000 lines): <cut> commit a0a9c9e188f5b97ff8b74287d1536f57ec5dda54 Author: Sanjay Patel <spatel(a)rotateright.com> Date: Wed Aug 11 12:41:47 2021 -0400 [InstCombine] avoid breaking up min/max (cmp+sel) idioms This is a quick fix for a motivating case that looks like this: https://godbolt.org/z/GeMqzMc38 As noted, we might be able to restore the min/max patterns with select folds, or we just wait for this to become easier with canonicalization to min/max intrinsics. --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 12 +++++++++--- llvm/test/Transforms/InstCombine/icmp-add.ll | 13 ++++++------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 2e20bca300d3..71037616585c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5755,9 +5755,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpWithDominatingICmp(I)) return Res; - if (Instruction *Res = foldICmpBinOp(I, Q)) - return Res; - if (Instruction *Res = foldICmpUsingKnownBits(I)) return Res; @@ -5803,6 +5800,15 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { } } + // The folds in here may rely on wrapping flags and special constants, so + // they can break up min/max idioms in some cases but not seemingly similar + // patterns. + // FIXME: It may be possible to enhance select folding to make this + // unnecessary. It may also be moot if we canonicalize to min/max + // intrinsics. + if (Instruction *Res = foldICmpBinOp(I, Q)) + return Res; + if (Instruction *Res = foldICmpInstWithConstant(I)) return Res; diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll index 187e0ad1a31b..1750b5685c50 100644 --- a/llvm/test/Transforms/InstCombine/icmp-add.ll +++ b/llvm/test/Transforms/InstCombine/icmp-add.ll @@ -972,7 +972,6 @@ define i1 @slt_offset_nsw(i8 %a, i8 %c) { ret i1 %ov } -; FIXME: ; In the following 4 tests, we could push the inc/dec ; through the min/max, but we should not break up the ; min/max idiom by using different icmp and select @@ -980,9 +979,9 @@ define i1 @slt_offset_nsw(i8 %a, i8 %c) { define i32 @increment_max(i32 %x) { ; CHECK-LABEL: @increment_max( -; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X:%.*]], 1 -; CHECK-NEXT: [[C_INV:%.*]] = icmp slt i32 [[X]], 0 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C_INV]], i32 0, i32 [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -1 +; CHECK-NEXT: [[S:%.*]] = add nsw i32 [[TMP2]], 1 ; CHECK-NEXT: ret i32 [[S]] ; %a = add nsw i32 %x, 1 @@ -1019,9 +1018,9 @@ define i32 @increment_min(i32 %x) { define i32 @decrement_min(i32 %x) { ; CHECK-LABEL: @decrement_min( -; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X:%.*]], -1 -; CHECK-NEXT: [[C_INV:%.*]] = icmp sgt i32 [[X]], 0 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C_INV]], i32 0, i32 [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 1 +; CHECK-NEXT: [[S:%.*]] = add nsw i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[S]] ; %a = add nsw i32 %x, -1 </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_apm/llvm-master-aarch64-spec2k6-Os - Build # 4 - Fixed!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_apm/llvm-master-aarch64-spec2k6-Os. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_apm/llvm-master-aarch64-spec2k6-Os Culprit: <cut> commit dae7adda949993bd96aa50c551dc64ddebba7923 Author: Matt Jacobson <mhjacobson(a)me.com> Date: Fri Aug 6 10:12:00 2021 +0800 [AVR][clang] Pass '-fno-use-init-array' to cc1 as default On AVR, '.ctors' is used, not '.init_array'. Make this the default unless specifically overridden by driver argument. This matches gcc, and it matches the behavior in (e.g.) the NetBSD driver (for certain OS variants). Reviewed by: MaskRay Differential Revision: https://reviews.llvm.org/D107610 </cut> Results regressed to (for first_bad == dae7adda949993bd96aa50c551dc64ddebba7923) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Os artifacts/build-dae7adda949993bd96aa50c551dc64ddebba7923/results_id: 1 # 400.perlbench,perlbench_base.default regressed by 94393 # 453.povray,povray_base.default regressed by 102 # 470.lbm,lbm_base.default regressed by 103 # 470.lbm,[.] LBM_performStreamCollide regressed by 118 from (for last_good == 66b1e629d89543cb7542c184f7dfb32deee732e1) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Os artifacts/build-66b1e629d89543cb7542c184f7dfb32deee732e1/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Results ID of last_good: apm_64/tcwg_bmk_llvm_apm/bisect-llvm-master-aarch64-spec2k6-Os/3855 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Results ID of first_bad: apm_64/tcwg_bmk_llvm_apm/bisect-llvm-master-aarch64-spec2k6-Os/3852 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-dae7adda949993bd96aa50c551dc64ddebba7923 cd investigate-llvm-dae7adda949993bd96aa50c551dc64ddebba7923 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach dae7adda949993bd96aa50c551dc64ddebba7923 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 66b1e629d89543cb7542c184f7dfb32deee732e1 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Full commit (up to 1000 lines): <cut> commit dae7adda949993bd96aa50c551dc64ddebba7923 Author: Matt Jacobson <mhjacobson(a)me.com> Date: Fri Aug 6 10:12:00 2021 +0800 [AVR][clang] Pass '-fno-use-init-array' to cc1 as default On AVR, '.ctors' is used, not '.init_array'. Make this the default unless specifically overridden by driver argument. This matches gcc, and it matches the behavior in (e.g.) the NetBSD driver (for certain OS variants). Reviewed by: MaskRay Differential Revision: https://reviews.llvm.org/D107610 --- clang/lib/Driver/ToolChains/AVR.cpp | 10 ++++++++++ clang/lib/Driver/ToolChains/AVR.h | 7 ++++++- clang/test/Driver/avr-toolchain.c | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChains/AVR.cpp b/clang/lib/Driver/ToolChains/AVR.cpp index 5b097f9b2ed9..18c6f41e22b1 100644 --- a/clang/lib/Driver/ToolChains/AVR.cpp +++ b/clang/lib/Driver/ToolChains/AVR.cpp @@ -370,6 +370,16 @@ void AVRToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, addSystemInclude(DriverArgs, CC1Args, AVRInc); } +void AVRToolChain::addClangTargetOptions( + const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, + Action::OffloadKind DeviceOffloadKind) const { + // By default, use `.ctors` (not `.init_array`), as required by libgcc, which + // runs constructors/destructors on AVR. + if (!DriverArgs.hasFlag(options::OPT_fuse_init_array, + options::OPT_fno_use_init_array, false)) + CC1Args.push_back("-fno-use-init-array"); +} + Tool *AVRToolChain::buildLinker() const { return new tools::AVR::Linker(getTriple(), *this, LinkStdlib); } diff --git a/clang/lib/Driver/ToolChains/AVR.h b/clang/lib/Driver/ToolChains/AVR.h index f612aa691182..2d027957ed76 100644 --- a/clang/lib/Driver/ToolChains/AVR.h +++ b/clang/lib/Driver/ToolChains/AVR.h @@ -11,8 +11,8 @@ #include "Gnu.h" #include "clang/Driver/InputInfo.h" -#include "clang/Driver/ToolChain.h" #include "clang/Driver/Tool.h" +#include "clang/Driver/ToolChain.h" namespace clang { namespace driver { @@ -26,6 +26,11 @@ public: AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; + void + addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args, + Action::OffloadKind DeviceOffloadKind) const override; + protected: Tool *buildLinker() const override; diff --git a/clang/test/Driver/avr-toolchain.c b/clang/test/Driver/avr-toolchain.c index 692063dc2c34..877f650a3d02 100644 --- a/clang/test/Driver/avr-toolchain.c +++ b/clang/test/Driver/avr-toolchain.c @@ -1,7 +1,7 @@ // A basic clang -cc1 command-line. // RUN: %clang %s -### -no-canonical-prefixes -target avr 2>&1 | FileCheck -check-prefix=CC1 %s -// CC1: clang{{.*}} "-cc1" "-triple" "avr" +// CC1: clang{{.*}} "-cc1" "-triple" "avr" {{.*}} "-fno-use-init-array" // RUN: %clang %s -### -no-canonical-prefixes -target avr --sysroot %S/Inputs/basic_avr_tree 2>&1 | FileCheck -check-prefix CC1A %s // CC1A: clang{{.*}} "-cc1" "-triple" "avr" {{.*}} "-internal-isystem" {{".*avr/include"}} </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tk1/llvm-release-arm-spec2k6-O3 - Build # 9 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O3. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O3 Culprit: <cut> commit cd6de0e8de4a5fd558580be4b1a07116914fc8ed Author: Sjoerd Meijer <sjoerd.meijer(a)arm.com> Date: Fri Feb 12 15:15:05 2021 +0000 [TTI] Unify FavorPostInc and FavorBackedgeIndex into getPreferredAddressingMode This refactors shouldFavorPostInc() and shouldFavorBackedgeIndex() into getPreferredAddressingMode() so that we have one interface to steer LSR in generating the preferred addressing mode. Differential Revision: https://reviews.llvm.org/D96600 </cut> Results regressed to (for first_bad == cd6de0e8de4a5fd558580be4b1a07116914fc8ed) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_marm artifacts/build-cd6de0e8de4a5fd558580be4b1a07116914fc8ed/results_id: 1 # 482.sphinx3,sphinx_livepretend_base.default regressed by 103 # 482.sphinx3,[.] vector_gautbl_eval_logs3 regressed by 111 from (for last_good == 4bd5bd40094c7b8b691cf394d813efc48d82acfd) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_marm artifacts/build-4bd5bd40094c7b8b691cf394d813efc48d82acfd/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of last_good: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O3/3835 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of first_bad: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O3/3840 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-cd6de0e8de4a5fd558580be4b1a07116914fc8ed cd investigate-llvm-cd6de0e8de4a5fd558580be4b1a07116914fc8ed git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach cd6de0e8de4a5fd558580be4b1a07116914fc8ed ../artifacts/test.sh # Reproduce last_good build git checkout --detach 4bd5bd40094c7b8b691cf394d813efc48d82acfd ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Full commit (up to 1000 lines): <cut> commit cd6de0e8de4a5fd558580be4b1a07116914fc8ed Author: Sjoerd Meijer <sjoerd.meijer(a)arm.com> Date: Fri Feb 12 15:15:05 2021 +0000 [TTI] Unify FavorPostInc and FavorBackedgeIndex into getPreferredAddressingMode This refactors shouldFavorPostInc() and shouldFavorBackedgeIndex() into getPreferredAddressingMode() so that we have one interface to steer LSR in generating the preferred addressing mode. Differential Revision: https://reviews.llvm.org/D96600 --- llvm/include/llvm/Analysis/TargetTransformInfo.h | 25 ++++++++++++---------- .../llvm/Analysis/TargetTransformInfoImpl.h | 7 +++--- llvm/lib/Analysis/TargetTransformInfo.cpp | 10 ++++----- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 22 ++++++++++--------- llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 4 ++-- .../Target/Hexagon/HexagonTargetTransformInfo.cpp | 5 +++-- .../Target/Hexagon/HexagonTargetTransformInfo.h | 3 ++- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 24 ++++++++++++--------- 8 files changed, 55 insertions(+), 45 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index c3d7d2cc80a4..79303dab92a2 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -638,13 +638,15 @@ public: DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const; - /// \return True is LSR should make efforts to create/preserve post-inc - /// addressing mode expressions. - bool shouldFavorPostInc() const; + enum AddressingModeKind { + AMK_PreIndexed, + AMK_PostIndexed, + AMK_None + }; - /// Return true if LSR should make efforts to generate indexed addressing - /// modes that operate across loop iterations. - bool shouldFavorBackedgeIndex(const Loop *L) const; + /// Return the preferred addressing mode LSR should make efforts to generate. + AddressingModeKind getPreferredAddressingMode(const Loop *L, + ScalarEvolution *SE) const; /// Return true if the target supports masked store. bool isLegalMaskedStore(Type *DataType, Align Alignment) const; @@ -1454,8 +1456,8 @@ public: virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) = 0; - virtual bool shouldFavorPostInc() const = 0; - virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0; + virtual AddressingModeKind + getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0; virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0; virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0; virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0; @@ -1796,9 +1798,10 @@ public: TargetLibraryInfo *LibInfo) override { return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo); } - bool shouldFavorPostInc() const override { return Impl.shouldFavorPostInc(); } - bool shouldFavorBackedgeIndex(const Loop *L) const override { - return Impl.shouldFavorBackedgeIndex(L); + AddressingModeKind + getPreferredAddressingMode(const Loop *L, + ScalarEvolution *SE) const override { + return Impl.getPreferredAddressingMode(L, SE); } bool isLegalMaskedStore(Type *DataType, Align Alignment) override { return Impl.isLegalMaskedStore(DataType, Alignment); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 84de5038df42..a9c9d3cb9f4f 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -209,9 +209,10 @@ public: return false; } - bool shouldFavorPostInc() const { return false; } - - bool shouldFavorBackedgeIndex(const Loop *L) const { return false; } + TTI::AddressingModeKind + getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const { + return TTI::AMK_None; + } bool isLegalMaskedStore(Type *DataType, Align Alignment) const { return false; diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 16992d099e0a..3db4b0b0d553 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -409,12 +409,10 @@ bool TargetTransformInfo::canSaveCmp(Loop *L, BranchInst **BI, return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo); } -bool TargetTransformInfo::shouldFavorPostInc() const { - return TTIImpl->shouldFavorPostInc(); -} - -bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const { - return TTIImpl->shouldFavorBackedgeIndex(L); +TTI::AddressingModeKind +TargetTransformInfo::getPreferredAddressingMode(const Loop *L, + ScalarEvolution *SE) const { + return TTIImpl->getPreferredAddressingMode(L, SE); } bool TargetTransformInfo::isLegalMaskedStore(Type *DataType, diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 80f1f2a2a8f7..8c2a79efc674 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -100,18 +100,20 @@ bool ARMTTIImpl::areInlineCompatible(const Function *Caller, return MatchExact && MatchSubset; } -bool ARMTTIImpl::shouldFavorBackedgeIndex(const Loop *L) const { - if (L->getHeader()->getParent()->hasOptSize()) - return false; +TTI::AddressingModeKind +ARMTTIImpl::getPreferredAddressingMode(const Loop *L, + ScalarEvolution *SE) const { if (ST->hasMVEIntegerOps()) - return false; - return ST->isMClass() && ST->isThumb2() && L->getNumBlocks() == 1; -} + return TTI::AMK_PostIndexed; -bool ARMTTIImpl::shouldFavorPostInc() const { - if (ST->hasMVEIntegerOps()) - return true; - return false; + if (L->getHeader()->getParent()->hasOptSize()) + return TTI::AMK_None; + + if (ST->isMClass() && ST->isThumb2() && + L->getNumBlocks() == 1) + return TTI::AMK_PreIndexed; + + return TTI::AMK_None; } Optional<Instruction *> diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index b8de27101a61..808128929000 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -103,8 +103,8 @@ public: bool enableInterleavedAccessVectorization() { return true; } - bool shouldFavorBackedgeIndex(const Loop *L) const; - bool shouldFavorPostInc() const; + TTI::AddressingModeKind + getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const; /// Floating-point computation using ARMv8 AArch32 Advanced /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index af7bc4682249..89e7df0aa27e 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -80,8 +80,9 @@ void HexagonTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, } } -bool HexagonTTIImpl::shouldFavorPostInc() const { - return true; +AddressingModeKind::getPreferredAddressingMode(const Loop *L, + ScalarEvolution *SE) const { + return AMK_PostIndexed; } /// --- Vector TTI begin --- diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index dc075d6147b6..ebaa619837f0 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -67,7 +67,8 @@ public: TTI::PeelingPreferences &PP); /// Bias LSR towards creating post-increment opportunities. - bool shouldFavorPostInc() const; + AddressingModeKind getPreferredAddressingMode(const Loop *L, + ScalarEvolution *SE) const; // L1 cache prefetch. unsigned getPrefetchDistance() const override; diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 5dec9b542076..2f90df70a3c3 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1227,13 +1227,15 @@ static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) { /// Tally up interesting quantities from the given register. void Cost::RateRegister(const Formula &F, const SCEV *Reg, SmallPtrSetImpl<const SCEV *> &Regs) { + TTI::AddressingModeKind AMK = TTI->getPreferredAddressingMode(L, SE); + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) { // If this is an addrec for another loop, it should be an invariant // with respect to L since L is the innermost loop (at least // for now LSR only handles innermost loops). if (AR->getLoop() != L) { // If the AddRec exists, consider it's register free and leave it alone. - if (isExistingPhi(AR, *SE) && !TTI->shouldFavorPostInc()) + if (isExistingPhi(AR, *SE) && AMK != TTI::AMK_PostIndexed) return; // It is bad to allow LSR for current loop to add induction variables @@ -1254,13 +1256,11 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg, // If the step size matches the base offset, we could use pre-indexed // addressing. - if (TTI->shouldFavorBackedgeIndex(L)) { + if (AMK == TTI::AMK_PreIndexed) { if (auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE))) if (Step->getAPInt() == F.BaseOffset) LoopCost = 0; - } - - if (TTI->shouldFavorPostInc()) { + } else if (AMK == TTI::AMK_PostIndexed) { const SCEV *LoopStep = AR->getStepRecurrence(*SE); if (isa<SCEVConstant>(LoopStep)) { const SCEV *LoopStart = AR->getStart(); @@ -3575,7 +3575,8 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, // may generate a post-increment operator. The reason is that the // reassociations cause extra base+register formula to be created, // and possibly chosen, but the post-increment is more efficient. - if (TTI.shouldFavorPostInc() && mayUsePostIncMode(TTI, LU, BaseReg, L, SE)) + TTI::AddressingModeKind AMK = TTI.getPreferredAddressingMode(L, &SE); + if (AMK == TTI::AMK_PostIndexed && mayUsePostIncMode(TTI, LU, BaseReg, L, SE)) return; SmallVector<const SCEV *, 8> AddOps; const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE); @@ -4239,7 +4240,8 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm; if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, NewF)) { - if (TTI.shouldFavorPostInc() && + if (TTI.getPreferredAddressingMode(this->L, &SE) == + TTI::AMK_PostIndexed && mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE)) continue; if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) @@ -4679,7 +4681,7 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() { /// If we are over the complexity limit, filter out any post-inc prefering /// variables to only post-inc values. void LSRInstance::NarrowSearchSpaceByFilterPostInc() { - if (!TTI.shouldFavorPostInc()) + if (TTI.getPreferredAddressingMode(L, &SE) != TTI::AMK_PostIndexed) return; if (EstimateSearchSpaceComplexity() < ComplexityLimit) return; @@ -4978,7 +4980,8 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution, // This can sometimes (notably when trying to favour postinc) lead to // sub-optimial decisions. There it is best left to the cost modelling to // get correct. - if (!TTI.shouldFavorPostInc() || LU.Kind != LSRUse::Address) { + if (TTI.getPreferredAddressingMode(L, &SE) != TTI::AMK_PostIndexed || + LU.Kind != LSRUse::Address) { int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size()); for (const SCEV *Reg : ReqRegs) { if ((F.ScaledReg && F.ScaledReg == Reg) || @@ -5560,7 +5563,8 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU) : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L), MSSAU(MSSAU), FavorBackedgeIndex(EnableBackedgeIndexing && - TTI.shouldFavorBackedgeIndex(L)) { + TTI.getPreferredAddressingMode(L, &SE) == + TTI::AMK_PreIndexed) { // If LoopSimplify form is not available, stay out of trouble. if (!L->isLoopSimplifyForm()) return; </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_kernel/llvm-master-aarch64-lts-allmodconfig - Build # 6 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *linux* in CI configuration tcwg_kernel/llvm-master-aarch64-lts-allmodconfig. So far, this commit has regressed CI configurations: - tcwg_kernel/llvm-master-aarch64-lts-allmodconfig Culprit: <cut> commit 132a8267adabd645476b542b3b132c1b91988fe8 Author: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Date: Thu Aug 12 13:22:21 2021 +0200 Linux 5.10.58 Link: https://lore.kernel.org/r/20210810172955.660225700@linuxfoundation.org Tested-by: Fox Chen <foxhlchen(a)gmail.com> Tested-by: Hulk Robot <hulkrobot(a)huawei.com> Tested-by: Sudip Mukherjee <sudip.mukherjee(a)codethink.co.uk> Tested-by: Linux Kernel Functional Testing <lkft(a)linaro.org> Tested-by: Guenter Roeck <linux(a)roeck-us.net> Tested-by: Shuah Khan <skhan(a)linuxfoundation.org> Tested-by: Aakash Hemadri <aakashhemadri123(a)gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> </cut> Results regressed to (for first_bad == 132a8267adabd645476b542b3b132c1b91988fe8) # reset_artifacts: -10 # build_abe binutils: -9 # build_llvm: -5 # build_abe qemu: -2 # linux_n_obj: 28702 # linux build successful: all # First few build errors in logs: from (for last_good == 3d7d1b0f5f41d66a2d177f9fdcdb32e23a4b2513) # reset_artifacts: -10 # build_abe binutils: -9 # build_llvm: -5 # build_abe qemu: -2 # linux_n_obj: 28702 # linux build successful: all # linux boot successful: boot Artifacts of last_good build: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-aarch64-lts-a… Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-aarch64-lts-a… Build top page/logs: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-aarch64-lts-a… Configuration details: Reproduce builds: <cut> mkdir investigate-linux-132a8267adabd645476b542b3b132c1b91988fe8 cd investigate-linux-132a8267adabd645476b542b3b132c1b91988fe8 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-aarch64-lts-a… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-aarch64-lts-a… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-aarch64-lts-a… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_kernel-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /linux/ ./ ./bisect/baseline/ cd linux # Reproduce first_bad build git checkout --detach 132a8267adabd645476b542b3b132c1b91988fe8 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 3d7d1b0f5f41d66a2d177f9fdcdb32e23a4b2513 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-aarch64-lts-a… Build log: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-aarch64-lts-a… Full commit (up to 1000 lines): <cut> commit 132a8267adabd645476b542b3b132c1b91988fe8 Author: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Date: Thu Aug 12 13:22:21 2021 +0200 Linux 5.10.58 Link: https://lore.kernel.org/r/20210810172955.660225700@linuxfoundation.org Tested-by: Fox Chen <foxhlchen(a)gmail.com> Tested-by: Hulk Robot <hulkrobot(a)huawei.com> Tested-by: Sudip Mukherjee <sudip.mukherjee(a)codethink.co.uk> Tested-by: Linux Kernel Functional Testing <lkft(a)linaro.org> Tested-by: Guenter Roeck <linux(a)roeck-us.net> Tested-by: Shuah Khan <skhan(a)linuxfoundation.org> Tested-by: Aakash Hemadri <aakashhemadri123(a)gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e9621a90e752..232dee1140c1 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 57 +SUBLEVEL = 58 EXTRAVERSION = NAME = Dare mighty things </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/llvm-master-aarch64-spec2k6-O2_LTO - Build # 22 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O2_LTO. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O2_LTO Culprit: <cut> commit 4389a413e2129d7d55ee779638b649aa852b6f8a Author: Zahira Ammarguellat <zahira.ammarguellat(a)intel.com> Date: Fri Aug 6 12:01:47 2021 -0700 Revert "[clang][fpenv][patch] Change clang option -ffp-model=precise to select ffp-contract=on" This reverts commit 48ad446a0fb2c9b98cb7047e4daf8a84c29cef8f. </cut> Results regressed to (for first_bad == 4389a413e2129d7d55ee779638b649aa852b6f8a) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2_LTO artifacts/build-4389a413e2129d7d55ee779638b649aa852b6f8a/results_id: 1 # 444.namd,namd_base.default regressed by 104 # 447.dealII,dealII_base.default regressed by 105 from (for last_good == dfce2909ee1ea1523ec27b834a0e56429e9c2beb) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2_LTO artifacts/build-dfce2909ee1ea1523ec27b834a0e56429e9c2beb/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O2_LTO/3823 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O2_LTO/3819 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-4389a413e2129d7d55ee779638b649aa852b6f8a cd investigate-llvm-4389a413e2129d7d55ee779638b649aa852b6f8a git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach 4389a413e2129d7d55ee779638b649aa852b6f8a ../artifacts/test.sh # Reproduce last_good build git checkout --detach dfce2909ee1ea1523ec27b834a0e56429e9c2beb ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Full commit (up to 1000 lines): <cut> commit 4389a413e2129d7d55ee779638b649aa852b6f8a Author: Zahira Ammarguellat <zahira.ammarguellat(a)intel.com> Date: Fri Aug 6 12:01:47 2021 -0700 Revert "[clang][fpenv][patch] Change clang option -ffp-model=precise to select ffp-contract=on" This reverts commit 48ad446a0fb2c9b98cb7047e4daf8a84c29cef8f. --- clang/docs/UsersManual.rst | 48 ++----------------------- clang/lib/Driver/ToolChains/Clang.cpp | 33 ++++++++--------- clang/test/CodeGen/ffp-contract-option.c | 47 +++--------------------- clang/test/CodeGen/ppc-emmintrin.c | 4 +-- clang/test/CodeGen/ppc-xmmintrin.c | 4 +-- clang/test/Driver/fp-model.c | 61 +++++++++++++++----------------- 6 files changed, 58 insertions(+), 139 deletions(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 838669794ea8..980d0ab45975 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1260,50 +1260,8 @@ installed. Controlling Floating Point Behavior ----------------------------------- -Clang provides a number of ways to control floating point behavior, including -with command line options and source pragmas. This section -describes the various floating point semantic modes and the corresponding options. - -.. csv-table:: Floating Point Semantic Modes - :header: "Mode", "Values" - :widths: 15, 30, 30 - - "except_behavior", "{ignore, strict, may_trap}", "ffp-exception-behavior" - "fenv_access", "{off, on}", "(none)" - "rounding_mode", "{dynamic, tonearest, downward, upward, towardzero}", "frounding-math" - "contract", "{on, off, fast}", "ffp-contract" - "denormal_fp_math", "{IEEE, PreserveSign, PositiveZero}", "fdenormal-fp-math" - "denormal_fp32_math", "{IEEE, PreserveSign, PositiveZero}", "fdenormal-fp-math-fp32" - "support_math_errno", "{on, off}", "fmath-errno" - "no_honor_nans", "{on, off}", "fhonor-nans" - "no_honor_infinities", "{on, off}", "fhonor-infinities" - "no_signed_zeros", "{on, off}", "fsigned-zeros" - "allow_reciprocal", "{on, off}", "freciprocal-math" - "allow_approximate_fns", "{on, off}", "(none)" - "allow_reassociation", "{on, off}", "fassociative-math" - - -This table describes the option settings that correspond to the three -floating point semantic models: precise (the default), strict, and fast. - - -.. csv-table:: Floating Point Models - :header: "Mode", "Precise", "Strict", "Fast" - :widths: 25, 15, 15, 15 - - "except_behavior", "ignore", "strict", "ignore" - "fenv_access", "off", "on", "off" - "rounding_mode", "tonearest", "dynamic", "tonearest" - "contract", "on", "off", "fast" - "denormal_fp_math", "IEEE", "IEEE", "PreserveSign" - "denormal_fp32_math", "IEEE","IEEE", "PreserveSign" - "support_math_errno", "on", "on", "off" - "no_honor_nans", "off", "off", "on" - "no_honor_infinities", "off", "off", "on" - "no_signed_zeros", "off", "off", "on" - "allow_reciprocal", "off", "off", "on" - "allow_approximate_fns", "off", "off", "on" - "allow_reassociation", "off", "off", "on" +Clang provides a number of ways to control floating point behavior. The options +are listed below. .. option:: -ffast-math @@ -1498,7 +1456,7 @@ Note that floating-point operations performed as part of constant initialization and ``fast``. Details: - * ``precise`` Disables optimizations that are not value-safe on floating-point data, although FP contraction (FMA) is enabled (``-ffp-contract=on``). This is the default behavior. + * ``precise`` Disables optimizations that are not value-safe on floating-point data, although FP contraction (FMA) is enabled (``-ffp-contract=fast``). This is the default behavior. * ``strict`` Enables ``-frounding-math`` and ``-ffp-exception-behavior=strict``, and disables contractions (FMA). All of the ``-ffast-math`` enablements are disabled. Enables ``STDC FENV_ACCESS``: by default ``FENV_ACCESS`` is disabled. This option setting behaves as though ``#pragma STDC FENV_ACESS ON`` appeared at the top of the source file. * ``fast`` Behaves identically to specifying both ``-ffast-math`` and ``ffp-contract=fast`` diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 1c79640be80f..96bbc0250126 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2641,7 +2641,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, llvm::DenormalMode DenormalFPMath = DefaultDenormalFPMath; llvm::DenormalMode DenormalFP32Math = DefaultDenormalFP32Math; - StringRef FPContract = "on"; + StringRef FPContract = ""; bool StrictFPModel = false; @@ -2666,7 +2666,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, ReciprocalMath = false; SignedZeros = true; // -fno_fast_math restores default denormal and fpcontract handling - FPContract = "on"; + FPContract = ""; DenormalFPMath = llvm::DenormalMode::getIEEE(); // FIXME: The target may have picked a non-IEEE default mode here based on @@ -2686,18 +2686,20 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, // ffp-model= is a Driver option, it is entirely rewritten into more // granular options before being passed into cc1. // Use the gcc option in the switch below. - if (!FPModel.empty() && !FPModel.equals(Val)) + if (!FPModel.empty() && !FPModel.equals(Val)) { D.Diag(clang::diag::warn_drv_overriding_flag_option) << Args.MakeArgString("-ffp-model=" + FPModel) << Args.MakeArgString("-ffp-model=" + Val); + FPContract = ""; + } if (Val.equals("fast")) { optID = options::OPT_ffast_math; FPModel = Val; - FPContract = Val; + FPContract = "fast"; } else if (Val.equals("precise")) { optID = options::OPT_ffp_contract; FPModel = Val; - FPContract = "on"; + FPContract = "fast"; PreciseFPModel = true; } else if (Val.equals("strict")) { StrictFPModel = true; @@ -2783,11 +2785,9 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, case options::OPT_ffp_contract: { StringRef Val = A->getValue(); if (PreciseFPModel) { - // When -ffp-model=precise is seen on the command line, - // the boolean PreciseFPModel is set to true which indicates - // "the current option is actually PreciseFPModel". The optID - // is changed to OPT_ffp_contract and FPContract is set to "on". - // the argument Val string is "precise": it shouldn't be checked. + // -ffp-model=precise enables ffp-contract=fast as a side effect + // the FPContract value has already been set to a string literal + // and the Val string isn't a pertinent value. ; } else if (Val.equals("fast") || Val.equals("on") || Val.equals("off")) FPContract = Val; @@ -2897,17 +2897,18 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, // -fno_fast_math restores default denormal and fpcontract handling DenormalFPMath = DefaultDenormalFPMath; DenormalFP32Math = llvm::DenormalMode::getIEEE(); - FPContract = "on"; + FPContract = ""; break; } if (StrictFPModel) { // If -ffp-model=strict has been specified on command line but // subsequent options conflict then emit warning diagnostic. - if (HonorINFs && HonorNaNs && !AssociativeMath && !ReciprocalMath && - SignedZeros && TrappingMath && RoundingFPMath && - DenormalFPMath == llvm::DenormalMode::getIEEE() && - DenormalFP32Math == llvm::DenormalMode::getIEEE() && - FPContract.equals("off")) + if (HonorINFs && HonorNaNs && + !AssociativeMath && !ReciprocalMath && + SignedZeros && TrappingMath && RoundingFPMath && + (FPContract.equals("off") || FPContract.empty()) && + DenormalFPMath == llvm::DenormalMode::getIEEE() && + DenormalFP32Math == llvm::DenormalMode::getIEEE()) // OK: Current Arg doesn't conflict with -ffp-model=strict ; else { diff --git a/clang/test/CodeGen/ffp-contract-option.c b/clang/test/CodeGen/ffp-contract-option.c index efc72c2b5461..52b750795940 100644 --- a/clang/test/CodeGen/ffp-contract-option.c +++ b/clang/test/CodeGen/ffp-contract-option.c @@ -1,46 +1,9 @@ -// RUN: %clang_cc1 -O3 -ffp-contract=fast -triple=aarch64-apple-darwin -S -o - %s | FileCheck --check-prefix=CHECK-FMADD %s +// RUN: %clang_cc1 -O3 -ffp-contract=fast -triple=aarch64-apple-darwin -S -o - %s | FileCheck %s // REQUIRES: aarch64-registered-target float fma_test1(float a, float b, float c) { -// CHECK-FMADD: fmadd - float x = a * b; - float y = x + c; - return y; -} - -// RUN: %clang_cc1 -triple=x86_64 %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-DEFAULT %s -// -// RUN: %clang_cc1 -triple=x86_64 -ffp-contract=off %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-DEFAULT %s -// RUN: %clang_cc1 -triple=x86_64 -ffp-contract=on %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-ON %s -// RUN: %clang_cc1 -triple=x86_64 -ffp-contract=fast %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-CONTRACTFAST %s -// -// RUN: %clang_cc1 -triple=x86_64 -ffast-math %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-DEFAULTFAST %s -// RUN: %clang_cc1 -triple=x86_64 -ffast-math -ffp-contract=off %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-DEFAULTFAST %s -// RUN: %clang_cc1 -triple=x86_64 -ffast-math -ffp-contract=on %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-ONFAST %s -// RUN: %clang_cc1 -triple=x86_64 -ffast-math -ffp-contract=fast %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-FASTFAST %s -float mymuladd( float x, float y, float z ) { - return x * y + z; - // CHECK-DEFAULT: = fmul float - // CHECK-DEFAULT: = fadd float - - // CHECK-ON: = call float @llvm.fmuladd.f32 - - // CHECK-CONTRACTFAST: = fmul contract float - // CHECK-CONTRACTFAST: = fadd contract float - - // CHECK-DEFAULTFAST: = fmul reassoc nnan ninf nsz arcp afn float - // CHECK-DEFAULTFAST: = fadd reassoc nnan ninf nsz arcp afn float - - // CHECK-ONFAST: = call reassoc nnan ninf nsz arcp afn float @llvm.fmuladd.f32 - - // CHECK-FASTFAST: = fmul fast float - // CHECK-FASTFAST: = fadd fast float +// CHECK: fmadd + float x = a * b; + float y = x + c; + return y; } diff --git a/clang/test/CodeGen/ppc-emmintrin.c b/clang/test/CodeGen/ppc-emmintrin.c index 4a246ff92d76..fa3801f50a01 100644 --- a/clang/test/CodeGen/ppc-emmintrin.c +++ b/clang/test/CodeGen/ppc-emmintrin.c @@ -2,9 +2,9 @@ // REQUIRES: powerpc-registered-target // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ -// RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ -// RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE // CHECK-BE-DAG: @_mm_movemask_pd.perm_mask = internal constant <4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 -2139078656>, align 16 // CHECK-BE-DAG: @_mm_shuffle_epi32.permute_selectors = internal constant [4 x i32] [i32 66051, i32 67438087, i32 134810123, i32 202182159], align 4 diff --git a/clang/test/CodeGen/ppc-xmmintrin.c b/clang/test/CodeGen/ppc-xmmintrin.c index a7f6ed6e0e67..d3f18bfbb1e5 100644 --- a/clang/test/CodeGen/ppc-xmmintrin.c +++ b/clang/test/CodeGen/ppc-xmmintrin.c @@ -2,11 +2,11 @@ // REQUIRES: powerpc-registered-target // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ -// RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE // RUN: %clang -x c++ -fsyntax-only -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ -// RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE // RUN: %clang -x c++ -fsyntax-only -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns diff --git a/clang/test/Driver/fp-model.c b/clang/test/Driver/fp-model.c index c6d683e25c0b..5fa9d110dd83 100644 --- a/clang/test/Driver/fp-model.c +++ b/clang/test/Driver/fp-model.c @@ -1,90 +1,88 @@ // Test that incompatible combinations of -ffp-model= options // and other floating point options get a warning diagnostic. +// +// REQUIRES: clang-driver -// RUN: %clang -target x86_64 -### -ffp-model=fast -ffp-contract=off -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=fast -ffp-contract=off -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN %s // WARN: warning: overriding '-ffp-model=fast' option with '-ffp-contract=off' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=fast -ffp-contract=on -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=fast -ffp-contract=on -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN1 %s // WARN1: warning: overriding '-ffp-model=fast' option with '-ffp-contract=on' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fassociative-math -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fassociative-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN2 %s // WARN2: warning: overriding '-ffp-model=strict' option with '-fassociative-math' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -ffast-math -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -ffast-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN3 %s // WARN3: warning: overriding '-ffp-model=strict' option with '-ffast-math' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -ffinite-math-only -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -ffinite-math-only -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN4 %s // WARN4: warning: overriding '-ffp-model=strict' option with '-ffinite-math-only' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -ffp-contract=fast -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -ffp-contract=fast -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN5 %s // WARN5: warning: overriding '-ffp-model=strict' option with '-ffp-contract=fast' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -ffp-contract=fast -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN6 %s -// WARN6: warning: overriding '-ffp-model=strict' option with '-ffp-contract=fast' [-Woverriding-t-option] - -// RUN: %clang -target x86_64 -### -ffp-model=strict -ffp-contract=on -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -ffp-contract=on -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN7 %s // WARN7: warning: overriding '-ffp-model=strict' option with '-ffp-contract=on' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fno-honor-infinities -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fno-honor-infinities -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN8 %s // WARN8: warning: overriding '-ffp-model=strict' option with '-fno-honor-infinities' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fno-honor-nans -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fno-honor-nans -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN9 %s // WARN9: warning: overriding '-ffp-model=strict' option with '-fno-honor-nans' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fno-rounding-math -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fno-rounding-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARNa %s // WARNa: warning: overriding '-ffp-model=strict' option with '-fno-rounding-math' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fno-signed-zeros -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fno-signed-zeros -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARNb %s // WARNb: warning: overriding '-ffp-model=strict' option with '-fno-signed-zeros' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fno-trapping-math -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fno-trapping-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARNc %s // WARNc: warning: overriding '-ffp-model=strict' option with '-fno-trapping-math' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -freciprocal-math -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -freciprocal-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARNd %s // WARNd: warning: overriding '-ffp-model=strict' option with '-freciprocal-math' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -funsafe-math-optimizations -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -funsafe-math-optimizations -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARNe %s // WARNe: warning: overriding '-ffp-model=strict' option with '-funsafe-math-optimizations' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -Ofast -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -Ofast -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARNf %s // WARNf: warning: overriding '-ffp-model=strict' option with '-Ofast' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fdenormal-fp-math=preserve-sign,preserve-sign -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fdenormal-fp-math=preserve-sign,preserve-sign -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN10 %s // WARN10: warning: overriding '-ffp-model=strict' option with '-fdenormal-fp-math=preserve-sign,preserve-sign' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -c %s 2>&1 \ +// RUN: %clang -### -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-NOROUND %s // CHECK-NOROUND: "-cc1" // CHECK-NOROUND: "-fno-rounding-math" -// RUN: %clang -target x86_64 -### -frounding-math -c %s 2>&1 \ +// RUN: %clang -### -frounding-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-ROUND --implicit-check-not ffp-exception-behavior=strict %s // CHECK-ROUND: "-cc1" // CHECK-ROUND: "-frounding-math" -// RUN: %clang -target x86_64 -### -ftrapping-math -c %s 2>&1 \ +// RUN: %clang -### -ftrapping-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-TRAP %s // CHECK-TRAP: "-cc1" // CHECK-TRAP: "-ffp-exception-behavior=strict" -// RUN: %clang -target x86_64 -### -nostdinc -ffp-model=fast -c %s 2>&1 \ +// RUN: %clang -### -nostdinc -ffp-model=fast -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FPM-FAST %s // CHECK-FPM-FAST: "-cc1" // CHECK-FPM-FAST: "-menable-no-infs" @@ -98,35 +96,34 @@ // CHECK-FPM-FAST: "-ffast-math" // CHECK-FPM-FAST: "-ffinite-math-only" -// RUN: %clang -target x86_64 -### -nostdinc -ffp-model=precise -c %s 2>&1 \ +// RUN: %clang -### -nostdinc -ffp-model=precise -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FPM-PRECISE %s // CHECK-FPM-PRECISE: "-cc1" -// CHECK-FPM-PRECISE: "-ffp-contract=on" +// CHECK-FPM-PRECISE: "-ffp-contract=fast" // CHECK-FPM-PRECISE: "-fno-rounding-math" -// RUN: %clang -target x86_64 -### -nostdinc -ffp-model=strict -c %s 2>&1 \ +// RUN: %clang -### -nostdinc -ffp-model=strict -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FPM-STRICT %s // CHECK-FPM-STRICT: "-cc1" -// CHECK-FPM-STRICT: "-fmath-errno" -// CHECK-FPM-STRICT: "-ffp-contract=off" // CHECK-FPM-STRICT: "-frounding-math" // CHECK-FPM-STRICT: "-ffp-exception-behavior=strict" -// RUN: %clang -target x86_64 -### -nostdinc -ffp-exception-behavior=strict -c %s 2>&1 \ +// RUN: %clang -### -nostdinc -ffp-exception-behavior=strict -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FEB-STRICT %s // CHECK-FEB-STRICT: "-cc1" // CHECK-FEB-STRICT: "-fno-rounding-math" // CHECK-FEB-STRICT: "-ffp-exception-behavior=strict" -// RUN: %clang -target x86_64 -### -nostdinc -ffp-exception-behavior=maytrap -c %s 2>&1 \ +// RUN: %clang -### -nostdinc -ffp-exception-behavior=maytrap -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FEB-MAYTRAP %s // CHECK-FEB-MAYTRAP: "-cc1" // CHECK-FEB-MAYTRAP: "-fno-rounding-math" // CHECK-FEB-MAYTRAP: "-ffp-exception-behavior=maytrap" -// RUN: %clang -target x86_64 -### -nostdinc -ffp-exception-behavior=ignore -c %s 2>&1 \ +// RUN: %clang -### -nostdinc -ffp-exception-behavior=ignore -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FEB-IGNORE %s // CHECK-FEB-IGNORE: "-cc1" // CHECK-FEB-IGNORE: "-fno-rounding-math" // CHECK-FEB-IGNORE: "-ffp-exception-behavior=ignore" + </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_gcc_bootstrap/master-aarch64-bootstrap_ubsan - Build # 1 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *gcc* in CI configuration tcwg_gcc_bootstrap/master-aarch64-bootstrap_ubsan. So far, this commit has regressed CI configurations: - tcwg_gcc_bootstrap/master-aarch64-bootstrap_ubsan Culprit: <cut> commit d1819df86fbe42125cccb2fc2959a0bf51e524d6 Author: Jonathan Wright <jonathan.wright(a)arm.com> Date: Mon Aug 16 14:37:18 2021 +0100 aarch64: Remove macros for vld4[q]_lane Neon intrinsics Remove macros for vld4[q]_lane Neon intrinsics. This is a preparatory step before adding new modes for structures of Advanced SIMD vectors. gcc/ChangeLog: 2021-08-16 Jonathan Wright <jonathan.wright(a)arm.com> * config/aarch64/arm_neon.h (__LD4_LANE_FUNC): Delete. (__LD4Q_LANE_FUNC): Likewise. (vld4_lane_u8): Define without macro. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. </cut> Results regressed to (for first_bad == d1819df86fbe42125cccb2fc2959a0bf51e524d6) # reset_artifacts: -10 # true: 0 # build_abe binutils: 1 # First few build errors in logs: # 00:10:53 make[3]: [Makefile:1769: aarch64-unknown-linux-gnu/bits/largefile-config.h] Error 1 (ignored) # 00:10:53 make[3]: [Makefile:1770: aarch64-unknown-linux-gnu/bits/largefile-config.h] Error 1 (ignored) # 00:26:15 /home/tcwg-buildslave/workspace/tcwg_gnu_2/abe/builds/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/gcc-gcc.git~master-stage2/prev-gcc/include/arm_neon.h:21081:11: error: cannot convert ‘float*’ to ‘const int*’ # 00:26:15 /home/tcwg-buildslave/workspace/tcwg_gnu_2/abe/builds/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/gcc-gcc.git~master-stage2/prev-gcc/include/arm_neon.h:21384:9: error: cannot convert ‘long int*’ to ‘const double*’ # 00:26:16 make[3]: *** [Makefile:226: lex.o] Error 1 # 00:26:30 make[2]: *** [Makefile:9758: all-stage2-libcpp] Error 2 # 00:28:15 make[1]: *** [Makefile:25899: stage2-bubble] Error 2 # 00:28:15 make: *** [Makefile:1010: all] Error 2 from (for last_good == 08f83812e5c5fdd9a7a4a1b9e46bb33725185c5a) # reset_artifacts: -10 # true: 0 # build_abe binutils: 1 # build_abe bootstrap_ubsan: 2 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-aarch64-bootstra… Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-aarch64-bootstra… Build top page/logs: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-aarch64-bootstra… Configuration details: Reproduce builds: <cut> mkdir investigate-gcc-d1819df86fbe42125cccb2fc2959a0bf51e524d6 cd investigate-gcc-d1819df86fbe42125cccb2fc2959a0bf51e524d6 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-aarch64-bootstra… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-aarch64-bootstra… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-aarch64-bootstra… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_gnu-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /gcc/ ./ ./bisect/baseline/ cd gcc # Reproduce first_bad build git checkout --detach d1819df86fbe42125cccb2fc2959a0bf51e524d6 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 08f83812e5c5fdd9a7a4a1b9e46bb33725185c5a ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-aarch64-bootstra… Build log: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-aarch64-bootstra… Full commit (up to 1000 lines): <cut> commit d1819df86fbe42125cccb2fc2959a0bf51e524d6 Author: Jonathan Wright <jonathan.wright(a)arm.com> Date: Mon Aug 16 14:37:18 2021 +0100 aarch64: Remove macros for vld4[q]_lane Neon intrinsics Remove macros for vld4[q]_lane Neon intrinsics. This is a preparatory step before adding new modes for structures of Advanced SIMD vectors. gcc/ChangeLog: 2021-08-16 Jonathan Wright <jonathan.wright(a)arm.com> * config/aarch64/arm_neon.h (__LD4_LANE_FUNC): Delete. (__LD4Q_LANE_FUNC): Likewise. (vld4_lane_u8): Define without macro. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. --- gcc/config/aarch64/arm_neon.h | 728 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 624 insertions(+), 104 deletions(-) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 29b62988a91..d8b29706a20 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -20856,110 +20856,595 @@ vld3q_lane_p64 (const poly64_t * __ptr, poly64x2x3_t __b, const int __c) /* vld4_lane */ -#define __LD4_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \ - qmode, ptrmode, funcsuffix, signedtype) \ -__extension__ extern __inline intype \ -__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \ -vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ -{ \ - __builtin_aarch64_simd_xi __o; \ - largetype __temp; \ - __temp.val[0] = \ - vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \ - __temp.val[1] = \ - vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \ - __temp.val[2] = \ - vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); \ - __temp.val[3] = \ - vcombine_##funcsuffix (__b.val[3], vcreate_##funcsuffix (0)); \ - __o = __builtin_aarch64_set_qregxi##qmode (__o, \ - (signedtype) __temp.val[0], \ - 0); \ - __o = __builtin_aarch64_set_qregxi##qmode (__o, \ - (signedtype) __temp.val[1], \ - 1); \ - __o = __builtin_aarch64_set_qregxi##qmode (__o, \ - (signedtype) __temp.val[2], \ - 2); \ - __o = __builtin_aarch64_set_qregxi##qmode (__o, \ - (signedtype) __temp.val[3], \ - 3); \ - __o = __builtin_aarch64_ld4_lane##mode ( \ - (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \ - __b.val[0] = (vectype) __builtin_aarch64_get_dregxidi (__o, 0); \ - __b.val[1] = (vectype) __builtin_aarch64_get_dregxidi (__o, 1); \ - __b.val[2] = (vectype) __builtin_aarch64_get_dregxidi (__o, 2); \ - __b.val[3] = (vectype) __builtin_aarch64_get_dregxidi (__o, 3); \ - return __b; \ +__extension__ extern __inline uint8x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_u8 (const uint8_t * __ptr, uint8x8x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + uint8x16x4_t __temp; + __temp.val[0] = vcombine_u8 (__b.val[0], vcreate_u8 (0)); + __temp.val[1] = vcombine_u8 (__b.val[1], vcreate_u8 (0)); + __temp.val[2] = vcombine_u8 (__b.val[2], vcreate_u8 (0)); + __temp.val[3] = vcombine_u8 (__b.val[3], vcreate_u8 (0)); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev8qi ( + (__builtin_aarch64_simd_qi *) __ptr, __o, __c); + __b.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; } -/* vld4q_lane */ +__extension__ extern __inline uint16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_u16 (const uint16_t * __ptr, uint16x4x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + uint16x8x4_t __temp; + __temp.val[0] = vcombine_u16 (__b.val[0], vcreate_u16 (0)); + __temp.val[1] = vcombine_u16 (__b.val[1], vcreate_u16 (0)); + __temp.val[2] = vcombine_u16 (__b.val[2], vcreate_u16 (0)); + __temp.val[3] = vcombine_u16 (__b.val[3], vcreate_u16 (0)); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev4hi ( + (__builtin_aarch64_simd_hi *) __ptr, __o, __c); + __b.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline uint32x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_u32 (const uint32_t * __ptr, uint32x2x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + uint32x4x4_t __temp; + __temp.val[0] = vcombine_u32 (__b.val[0], vcreate_u32 (0)); + __temp.val[1] = vcombine_u32 (__b.val[1], vcreate_u32 (0)); + __temp.val[2] = vcombine_u32 (__b.val[2], vcreate_u32 (0)); + __temp.val[3] = vcombine_u32 (__b.val[3], vcreate_u32 (0)); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev2si ( + (__builtin_aarch64_simd_si *) __ptr, __o, __c); + __b.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline uint64x1x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_u64 (const uint64_t * __ptr, uint64x1x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + uint64x2x4_t __temp; + __temp.val[0] = vcombine_u64 (__b.val[0], vcreate_u64 (0)); + __temp.val[1] = vcombine_u64 (__b.val[1], vcreate_u64 (0)); + __temp.val[2] = vcombine_u64 (__b.val[2], vcreate_u64 (0)); + __temp.val[3] = vcombine_u64 (__b.val[3], vcreate_u64 (0)); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanedi ( + (__builtin_aarch64_simd_di *) __ptr, __o, __c); + __b.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline int8x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_s8 (const int8_t * __ptr, int8x8x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + int8x16x4_t __temp; + __temp.val[0] = vcombine_s8 (__b.val[0], vcreate_s8 (0)); + __temp.val[1] = vcombine_s8 (__b.val[1], vcreate_s8 (0)); + __temp.val[2] = vcombine_s8 (__b.val[2], vcreate_s8 (0)); + __temp.val[3] = vcombine_s8 (__b.val[3], vcreate_s8 (0)); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev8qi ( + (__builtin_aarch64_simd_qi *) __ptr, __o, __c); + __b.val[0] = (int8x8_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (int8x8_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (int8x8_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (int8x8_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline int16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_s16 (const int16_t * __ptr, int16x4x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + int16x8x4_t __temp; + __temp.val[0] = vcombine_s16 (__b.val[0], vcreate_s16 (0)); + __temp.val[1] = vcombine_s16 (__b.val[1], vcreate_s16 (0)); + __temp.val[2] = vcombine_s16 (__b.val[2], vcreate_s16 (0)); + __temp.val[3] = vcombine_s16 (__b.val[3], vcreate_s16 (0)); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev4hi ( + (__builtin_aarch64_simd_hi *) __ptr, __o, __c); + __b.val[0] = (int16x4_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (int16x4_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (int16x4_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (int16x4_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline int32x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_s32 (const int32_t * __ptr, int32x2x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + int32x4x4_t __temp; + __temp.val[0] = vcombine_s32 (__b.val[0], vcreate_s32 (0)); + __temp.val[1] = vcombine_s32 (__b.val[1], vcreate_s32 (0)); + __temp.val[2] = vcombine_s32 (__b.val[2], vcreate_s32 (0)); + __temp.val[3] = vcombine_s32 (__b.val[3], vcreate_s32 (0)); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev2si ( + (__builtin_aarch64_simd_si *) __ptr, __o, __c); + __b.val[0] = (int32x2_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (int32x2_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (int32x2_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (int32x2_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline int64x1x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_s64 (const int64_t * __ptr, int64x1x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + int64x2x4_t __temp; + __temp.val[0] = vcombine_s64 (__b.val[0], vcreate_s64 (0)); + __temp.val[1] = vcombine_s64 (__b.val[1], vcreate_s64 (0)); + __temp.val[2] = vcombine_s64 (__b.val[2], vcreate_s64 (0)); + __temp.val[3] = vcombine_s64 (__b.val[3], vcreate_s64 (0)); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanedi ( + (__builtin_aarch64_simd_di *) __ptr, __o, __c); + __b.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline float16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_f16 (const float16_t * __ptr, float16x4x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + float16x8x4_t __temp; + __temp.val[0] = vcombine_f16 (__b.val[0], vcreate_f16 (0)); + __temp.val[1] = vcombine_f16 (__b.val[1], vcreate_f16 (0)); + __temp.val[2] = vcombine_f16 (__b.val[2], vcreate_f16 (0)); + __temp.val[3] = vcombine_f16 (__b.val[3], vcreate_f16 (0)); + __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev4hf ( + (__builtin_aarch64_simd_hf *) __ptr, __o, __c); + __b.val[0] = (float16x4_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (float16x4_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (float16x4_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (float16x4_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline float32x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_f32 (const float32_t * __ptr, float32x2x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + float32x4x4_t __temp; + __temp.val[0] = vcombine_f32 (__b.val[0], vcreate_f32 (0)); + __temp.val[1] = vcombine_f32 (__b.val[1], vcreate_f32 (0)); + __temp.val[2] = vcombine_f32 (__b.val[2], vcreate_f32 (0)); + __temp.val[3] = vcombine_f32 (__b.val[3], vcreate_f32 (0)); + __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev2si ( + (__builtin_aarch64_simd_sf *) __ptr, __o, __c); + __b.val[0] = (float32x2_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (float32x2_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (float32x2_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (float32x2_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline float64x1x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_f64 (const float64_t * __ptr, float64x1x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + float64x2x4_t __temp; + __temp.val[0] = vcombine_f64 (__b.val[0], vcreate_f64 (0)); + __temp.val[1] = vcombine_f64 (__b.val[1], vcreate_f64 (0)); + __temp.val[2] = vcombine_f64 (__b.val[2], vcreate_f64 (0)); + __temp.val[3] = vcombine_f64 (__b.val[3], vcreate_f64 (0)); + __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanedf ( + (__builtin_aarch64_simd_df *) __ptr, __o, __c); + __b.val[0] = (float64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (float64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (float64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (float64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline poly8x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_p8 (const poly8_t * __ptr, poly8x8x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + poly8x16x4_t __temp; + __temp.val[0] = vcombine_p8 (__b.val[0], vcreate_p8 (0)); + __temp.val[1] = vcombine_p8 (__b.val[1], vcreate_p8 (0)); + __temp.val[2] = vcombine_p8 (__b.val[2], vcreate_p8 (0)); + __temp.val[3] = vcombine_p8 (__b.val[3], vcreate_p8 (0)); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev8qi ( + (__builtin_aarch64_simd_qi *) __ptr, __o, __c); + __b.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} -__LD4_LANE_FUNC (float16x4x4_t, float16x4_t, float16x8x4_t, float16_t, v4hf, - v8hf, hf, f16, float16x8_t) -__LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v2sf, v4sf, - sf, f32, float32x4_t) -__LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, df, v2df, - df, f64, float64x2_t) -__LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8, - int8x16_t) -__LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, - p16, int16x8_t) -__LD4_LANE_FUNC (poly64x1x4_t, poly64x1_t, poly64x2x4_t, poly64_t, di, - v2di_ssps, di, p64, poly64x2_t) -__LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8, - int8x16_t) -__LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16, - int16x8_t) -__LD4_LANE_FUNC (int32x2x4_t, int32x2_t, int32x4x4_t, int32_t, v2si, v4si, si, s32, - int32x4_t) -__LD4_LANE_FUNC (int64x1x4_t, int64x1_t, int64x2x4_t, int64_t, di, v2di, di, s64, - int64x2_t) -__LD4_LANE_FUNC (uint8x8x4_t, uint8x8_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8, - int8x16_t) -__LD4_LANE_FUNC (uint16x4x4_t, uint16x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, - u16, int16x8_t) -__LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v2si, v4si, si, - u32, int32x4_t) -__LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, di, v2di, di, - u64, int64x2_t) +__extension__ extern __inline poly16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_p16 (const poly16_t * __ptr, poly16x4x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + poly16x8x4_t __temp; + __temp.val[0] = vcombine_p16 (__b.val[0], vcreate_p16 (0)); + __temp.val[1] = vcombine_p16 (__b.val[1], vcreate_p16 (0)); + __temp.val[2] = vcombine_p16 (__b.val[2], vcreate_p16 (0)); + __temp.val[3] = vcombine_p16 (__b.val[3], vcreate_p16 (0)); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev4hi ( + (__builtin_aarch64_simd_hi *) __ptr, __o, __c); + __b.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline poly64x1x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_p64 (const poly64_t * __ptr, poly64x1x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + poly64x2x4_t __temp; + __temp.val[0] = vcombine_p64 (__b.val[0], vcreate_p64 (0)); + __temp.val[1] = vcombine_p64 (__b.val[1], vcreate_p64 (0)); + __temp.val[2] = vcombine_p64 (__b.val[2], vcreate_p64 (0)); + __temp.val[3] = vcombine_p64 (__b.val[3], vcreate_p64 (0)); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanedi ( + (__builtin_aarch64_simd_di *) __ptr, __o, __c); + __b.val[0] = (poly64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (poly64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (poly64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (poly64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} /* vld4q_lane */ -#define __LD4Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \ -__extension__ extern __inline intype \ -__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \ -vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ -{ \ - __builtin_aarch64_simd_xi __o; \ - intype ret; \ - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); \ - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); \ - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); \ - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); \ - __o = __builtin_aarch64_ld4_lane##mode ( \ - (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \ - ret.val[0] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 0); \ - ret.val[1] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 1); \ - ret.val[2] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 2); \ - ret.val[3] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 3); \ - return ret; \ -} - -__LD4Q_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16) -__LD4Q_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32) -__LD4Q_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64) -__LD4Q_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8) -__LD4Q_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16) -__LD4Q_LANE_FUNC (poly64x2x4_t, poly64x2_t, poly64_t, v2di, di, p64) -__LD4Q_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8) -__LD4Q_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16) -__LD4Q_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32) -__LD4Q_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64) -__LD4Q_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8) -__LD4Q_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16) -__LD4Q_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32) -__LD4Q_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64) +__extension__ extern __inline uint8x16x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_u8 (const uint8_t * __ptr, uint8x16x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + uint8x16x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev16qi ( + (__builtin_aarch64_simd_qi *) __ptr, __o, __c); + ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline uint16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_u16 (const uint16_t * __ptr, uint16x8x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + uint16x8x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev8hi ( + (__builtin_aarch64_simd_hi *) __ptr, __o, __c); + ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline uint32x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_u32 (const uint32_t * __ptr, uint32x4x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + uint32x4x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev4si ( + (__builtin_aarch64_simd_si *) __ptr, __o, __c); + ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline uint64x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_u64 (const uint64_t * __ptr, uint64x2x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + uint64x2x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev2di ( + (__builtin_aarch64_simd_di *) __ptr, __o, __c); + ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline int8x16x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_s8 (const int8_t * __ptr, int8x16x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + int8x16x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev16qi ( + (__builtin_aarch64_simd_qi *) __ptr, __o, __c); + ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline int16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_s16 (const int16_t * __ptr, int16x8x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + int16x8x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev8hi ( + (__builtin_aarch64_simd_hi *) __ptr, __o, __c); + ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline int32x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_s32 (const int32_t * __ptr, int32x4x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + int32x4x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev4si ( + (__builtin_aarch64_simd_si *) __ptr, __o, __c); + ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline int64x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_s64 (const int64_t * __ptr, int64x2x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + int64x2x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev2di ( + (__builtin_aarch64_simd_di *) __ptr, __o, __c); + ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline float16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_f16 (const float16_t * __ptr, float16x8x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + float16x8x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev8hf ( + (__builtin_aarch64_simd_hf *) __ptr, __o, __c); + ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (float16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline float32x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_f32 (const float32_t * __ptr, float32x4x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + float32x4x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev4sf ( + (__builtin_aarch64_simd_sf *) __ptr, __o, __c); + ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline float64x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_f64 (const float64_t * __ptr, float64x2x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + float64x2x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev2df ( + (__builtin_aarch64_simd_di *) __ptr, __o, __c); + ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline poly8x16x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_p8 (const poly8_t * __ptr, poly8x16x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + poly8x16x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev16qi ( + (__builtin_aarch64_simd_qi *) __ptr, __o, __c); + ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline poly16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_p16 (const poly16_t * __ptr, poly16x8x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + poly16x8x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev8hi ( + (__builtin_aarch64_simd_hi *) __ptr, __o, __c); + ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline poly64x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_p64 (const poly64_t * __ptr, poly64x2x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + poly64x2x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev2di ( + (__builtin_aarch64_simd_di *) __ptr, __o, __c); + ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (poly64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} /* vmax */ @@ -35441,9 +35926,47 @@ vld3q_lane_bf16 (const bfloat16_t * __ptr, bfloat16x8x3_t __b, const int __c) return ret; } -__LD4_LANE_FUNC (bfloat16x4x4_t, bfloat16x4_t, bfloat16x8x4_t, bfloat16_t, v4bf, - v8bf, bf, bf16, bfloat16x8_t) -__LD4Q_LANE_FUNC (bfloat16x8x4_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16) +__extension__ extern __inline bfloat16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_bf16 (const bfloat16_t * __ptr, bfloat16x4x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + bfloat16x8x4_t __temp; + __temp.val[0] = vcombine_bf16 (__b.val[0], vcreate_bf16 (0)); + __temp.val[1] = vcombine_bf16 (__b.val[1], vcreate_bf16 (0)); + __temp.val[2] = vcombine_bf16 (__b.val[2], vcreate_bf16 (0)); + __temp.val[3] = vcombine_bf16 (__b.val[3], vcreate_bf16 (0)); + __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev4bf ( + (__builtin_aarch64_simd_bf *) __ptr, __o, __c); + __b.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (bfloat16x4_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline bfloat16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_bf16 (const bfloat16_t * __ptr, bfloat16x8x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + bfloat16x8x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev8bf ( + (__builtin_aarch64_simd_bf *) __ptr, __o, __c); + ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -35739,7 +36262,4 @@ vaddq_p128 (poly128_t __a, poly128_t __b) #undef __aarch64_vdupq_laneq_u32 #undef __aarch64_vdupq_laneq_u64 -#undef __LD4_LANE_FUNC -#undef __LD4Q_LANE_FUNC - #endif </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_kernel/llvm-release-arm-next-allmodconfig - Build # 31 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *linux* in CI configuration tcwg_kernel/llvm-release-arm-next-allmodconfig. So far, this commit has regressed CI configurations: - tcwg_kernel/llvm-release-arm-next-allmodconfig Culprit: <cut> commit fad7cd3310db3099f95dd34312c77740fbc455e5 Author: Baokun Li <libaokun1(a)huawei.com> Date: Wed Aug 4 10:12:12 2021 +0800 nbd: add the check to prevent overflow in __nbd_ioctl() If user specify a large enough value of NBD blocks option, it may trigger signed integer overflow which may lead to nbd->config->bytesize becomes a large or small value, zero in particular. UBSAN: Undefined behaviour in drivers/block/nbd.c:325:31 signed integer overflow: 1024 * 4611686155866341414 cannot be represented in type 'long long int' [...] Call trace: [...] handle_overflow+0x188/0x1dc lib/ubsan.c:192 __ubsan_handle_mul_overflow+0x34/0x44 lib/ubsan.c:213 nbd_size_set drivers/block/nbd.c:325 [inline] __nbd_ioctl drivers/block/nbd.c:1342 [inline] nbd_ioctl+0x998/0xa10 drivers/block/nbd.c:1395 __blkdev_driver_ioctl block/ioctl.c:311 [inline] [...] Although it is not a big deal, still silence the UBSAN by limit the input value. Reported-by: Hulk Robot <hulkci(a)huawei.com> Signed-off-by: Baokun Li <libaokun1(a)huawei.com> Reviewed-by: Josef Bacik <josef(a)toxicpanda.com> Link: https://lore.kernel.org/r/20210804021212.990223-1-libaokun1@huawei.com [axboe: dropped unlikely()] Signed-off-by: Jens Axboe <axboe(a)kernel.dk> </cut> Results regressed to (for first_bad == fad7cd3310db3099f95dd34312c77740fbc455e5) # reset_artifacts: -10 # build_abe binutils: -9 # build_llvm: -5 # build_abe qemu: -2 # linux_n_obj: 21709 # First few build errors in logs: # 00:07:12 make[1]: *** [modules-only.symvers] Error 1 # 00:07:12 make: *** [modules] Error 2 from (for last_good == da20b58d5bbbb0d23ae9530992a37d0f0d1787a4) # reset_artifacts: -10 # build_abe binutils: -9 # build_llvm: -5 # build_abe qemu: -2 # linux_n_obj: 29751 # linux build successful: all Artifacts of last_good build: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-release-arm-next-all… Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-release-arm-next-all… Build top page/logs: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-release-arm-next-all… Configuration details: rr[linux_git]="https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git#ecf9343…" Reproduce builds: <cut> mkdir investigate-linux-fad7cd3310db3099f95dd34312c77740fbc455e5 cd investigate-linux-fad7cd3310db3099f95dd34312c77740fbc455e5 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-release-arm-next-all… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-release-arm-next-all… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-release-arm-next-all… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_kernel-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /linux/ ./ ./bisect/baseline/ cd linux # Reproduce first_bad build git checkout --detach fad7cd3310db3099f95dd34312c77740fbc455e5 ../artifacts/test.sh # Reproduce last_good build git checkout --detach da20b58d5bbbb0d23ae9530992a37d0f0d1787a4 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-release-arm-next-all… Build log: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-release-arm-next-all… Full commit (up to 1000 lines): <cut> commit fad7cd3310db3099f95dd34312c77740fbc455e5 Author: Baokun Li <libaokun1(a)huawei.com> Date: Wed Aug 4 10:12:12 2021 +0800 nbd: add the check to prevent overflow in __nbd_ioctl() If user specify a large enough value of NBD blocks option, it may trigger signed integer overflow which may lead to nbd->config->bytesize becomes a large or small value, zero in particular. UBSAN: Undefined behaviour in drivers/block/nbd.c:325:31 signed integer overflow: 1024 * 4611686155866341414 cannot be represented in type 'long long int' [...] Call trace: [...] handle_overflow+0x188/0x1dc lib/ubsan.c:192 __ubsan_handle_mul_overflow+0x34/0x44 lib/ubsan.c:213 nbd_size_set drivers/block/nbd.c:325 [inline] __nbd_ioctl drivers/block/nbd.c:1342 [inline] nbd_ioctl+0x998/0xa10 drivers/block/nbd.c:1395 __blkdev_driver_ioctl block/ioctl.c:311 [inline] [...] Although it is not a big deal, still silence the UBSAN by limit the input value. Reported-by: Hulk Robot <hulkci(a)huawei.com> Signed-off-by: Baokun Li <libaokun1(a)huawei.com> Reviewed-by: Josef Bacik <josef(a)toxicpanda.com> Link: https://lore.kernel.org/r/20210804021212.990223-1-libaokun1@huawei.com [axboe: dropped unlikely()] Signed-off-by: Jens Axboe <axboe(a)kernel.dk> --- drivers/block/nbd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index c38317979f74..f82264835794 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1384,6 +1384,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, unsigned int cmd, unsigned long arg) { struct nbd_config *config = nbd->config; + loff_t bytesize; switch (cmd) { case NBD_DISCONNECT: @@ -1398,8 +1399,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, case NBD_SET_SIZE: return nbd_set_size(nbd, arg, config->blksize); case NBD_SET_SIZE_BLOCKS: - return nbd_set_size(nbd, arg * config->blksize, - config->blksize); + if (check_mul_overflow((loff_t)arg, config->blksize, &bytesize)) + return -EINVAL; + return nbd_set_size(nbd, bytesize, config->blksize); case NBD_SET_TIMEOUT: nbd_set_cmd_timeout(nbd, arg); return 0; </cut>

3 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/llvm-release-aarch64-spec2k6-O3 - Build # 7 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-release-aarch64-spec2k6-O3. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tx1/llvm-release-aarch64-spec2k6-O3 Culprit: <cut> commit 6998f8ae2d14e096aff33968f226587b5c1a193a Author: David Sherwood <david.sherwood(a)arm.com> Date: Wed Mar 10 08:34:19 2021 +0000 [LoopVectorize] Simplify scalar cost calculation in getInstructionCost This patch simplifies the calculation of certain costs in getInstructionCost when isScalarAfterVectorization() returns a true value. There are a few places where we multiply a cost by a number N, i.e. unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1; return N * TTI.getArithmeticInstrCost(... After some investigation it seems that there are only these cases that occur in practice: 1. VF is a scalar, in which case N = 1. 2. VF is a vector. We can only get here if: a) the instruction is a GEP/bitcast/PHI with scalar uses, or b) this is an update to an induction variable that remains scalar. I have changed the code so that N is assumed to always be 1. For GEPs the cost is always 0, since this is calculated later on as part of the load/store cost. PHI nodes are costed separately and were never previously multiplied by VF. For all other cases I have added an assert that none of the users needs scalarising, which didn't fire in any unit tests. Only one test required fixing and I believe the original cost for the scalar add instruction to have been wrong, since only one copy remains after vectorisation. I have also added a new test for the case when a pointer PHI feeds directly into a store that will be scalarised as we were previously never testing it. Differential Revision: https://reviews.llvm.org/D99718 </cut> Results regressed to (for first_bad == 6998f8ae2d14e096aff33968f226587b5c1a193a) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3 artifacts/build-6998f8ae2d14e096aff33968f226587b5c1a193a/results_id: 1 # 462.libquantum,libquantum_base.default regressed by 114 # 462.libquantum,[.] quantum_toffoli regressed by 123 # 462.libquantum,[.] quantum_cnot regressed by 115 from (for last_good == c835630c25a4f9925517949579f66a43b113fbc9) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3 artifacts/build-c835630c25a4f9925517949579f66a43b113fbc9/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-release-aarch64-spec2k6-O3/3744 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-release-aarch64-spec2k6-O3/3755 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-6998f8ae2d14e096aff33968f226587b5c1a193a cd investigate-llvm-6998f8ae2d14e096aff33968f226587b5c1a193a git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach 6998f8ae2d14e096aff33968f226587b5c1a193a ../artifacts/test.sh # Reproduce last_good build git checkout --detach c835630c25a4f9925517949579f66a43b113fbc9 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Full commit (up to 1000 lines): <cut> commit 6998f8ae2d14e096aff33968f226587b5c1a193a Author: David Sherwood <david.sherwood(a)arm.com> Date: Wed Mar 10 08:34:19 2021 +0000 [LoopVectorize] Simplify scalar cost calculation in getInstructionCost This patch simplifies the calculation of certain costs in getInstructionCost when isScalarAfterVectorization() returns a true value. There are a few places where we multiply a cost by a number N, i.e. unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1; return N * TTI.getArithmeticInstrCost(... After some investigation it seems that there are only these cases that occur in practice: 1. VF is a scalar, in which case N = 1. 2. VF is a vector. We can only get here if: a) the instruction is a GEP/bitcast/PHI with scalar uses, or b) this is an update to an induction variable that remains scalar. I have changed the code so that N is assumed to always be 1. For GEPs the cost is always 0, since this is calculated later on as part of the load/store cost. PHI nodes are costed separately and were never previously multiplied by VF. For all other cases I have added an assert that none of the users needs scalarising, which didn't fire in any unit tests. Only one test required fixing and I believe the original cost for the scalar add instruction to have been wrong, since only one copy remains after vectorisation. I have also added a new test for the case when a pointer PHI feeds directly into a store that will be scalarised as we were previously never testing it. Differential Revision: https://reviews.llvm.org/D99718 --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 73 +++++++++++++--------- .../AArch64/no_vector_instructions.ll | 2 +- .../LoopVectorize/AArch64/predication_costs.ll | 35 +++++++++++ .../Transforms/LoopVectorize/scalarized-bitcast.ll | 40 ++++++++++++ 4 files changed, 121 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 2b413fc49505..f25af23c86c2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7383,10 +7383,39 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, Type *RetTy = I->getType(); if (canTruncateToMinimalBitwidth(I, VF)) RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); - VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF); auto SE = PSE.getSE(); TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + auto hasSingleCopyAfterVectorization = [this](Instruction *I, + ElementCount VF) -> bool { + if (VF.isScalar()) + return true; + + auto Scalarized = InstsToScalarize.find(VF); + assert(Scalarized != InstsToScalarize.end() && + "VF not yet analyzed for scalarization profitability"); + return !Scalarized->second.count(I) && + llvm::all_of(I->users(), [&](User *U) { + auto *UI = cast<Instruction>(U); + return !Scalarized->second.count(UI); + }); + }; + + if (isScalarAfterVectorization(I, VF)) { + // With the exception of GEPs and PHIs, after scalarization there should + // only be one copy of the instruction generated in the loop. This is + // because the VF is either 1, or any instructions that need scalarizing + // have already been dealt with by the the time we get here. As a result, + // it means we don't have to multiply the instruction cost by VF. + assert(I->getOpcode() == Instruction::GetElementPtr || + I->getOpcode() == Instruction::PHI || + (I->getOpcode() == Instruction::BitCast && + I->getType()->isPointerTy()) || + hasSingleCopyAfterVectorization(I, VF)); + VectorTy = RetTy; + } else + VectorTy = ToVectorTy(RetTy, VF); + // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { case Instruction::GetElementPtr: @@ -7514,21 +7543,16 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, Op2VK = TargetTransformInfo::OK_UniformValue; SmallVector<const Value *, 4> Operands(I->operand_values()); - unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1; - return N * TTI.getArithmeticInstrCost( - I->getOpcode(), VectorTy, CostKind, - TargetTransformInfo::OK_AnyValue, - Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I); + return TTI.getArithmeticInstrCost( + I->getOpcode(), VectorTy, CostKind, TargetTransformInfo::OK_AnyValue, + Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I); } case Instruction::FNeg: { assert(!VF.isScalable() && "VF is assumed to be non scalable."); - unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1; - return N * TTI.getArithmeticInstrCost( - I->getOpcode(), VectorTy, CostKind, - TargetTransformInfo::OK_AnyValue, - TargetTransformInfo::OK_AnyValue, - TargetTransformInfo::OP_None, TargetTransformInfo::OP_None, - I->getOperand(0), I); + return TTI.getArithmeticInstrCost( + I->getOpcode(), VectorTy, CostKind, TargetTransformInfo::OK_AnyValue, + TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None, + TargetTransformInfo::OP_None, I->getOperand(0), I); } case Instruction::Select: { SelectInst *SI = cast<SelectInst>(I); @@ -7583,6 +7607,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, VectorTy = ToVectorTy(getMemInstValueType(I), Width); return getMemoryInstructionCost(I, VF); } + case Instruction::BitCast: + if (I->getType()->isPointerTy()) + return 0; + LLVM_FALLTHROUGH; case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: @@ -7593,8 +7621,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, case Instruction::SIToFP: case Instruction::UIToFP: case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: { + case Instruction::FPTrunc: { // Computes the CastContextHint from a Load/Store instruction. auto ComputeCCH = [&](Instruction *I) -> TTI::CastContextHint { assert((isa<LoadInst>(I) || isa<StoreInst>(I)) && @@ -7672,14 +7699,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, } } - unsigned N; - if (isScalarAfterVectorization(I, VF)) { - assert(!VF.isScalable() && "VF is assumed to be non scalable"); - N = VF.getKnownMinValue(); - } else - N = 1; - return N * - TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I); + return TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I); } case Instruction::Call: { bool NeedToScalarize; @@ -7694,11 +7714,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, case Instruction::ExtractValue: return TTI.getInstructionCost(I, TTI::TCK_RecipThroughput); default: - // The cost of executing VF copies of the scalar instruction. This opcode - // is unknown. Assume that it is the same as 'mul'. - return VF.getKnownMinValue() * TTI.getArithmeticInstrCost( - Instruction::Mul, VectorTy, CostKind) + - getScalarizationOverhead(I, VF); + // This opcode is unknown. Assume that it is the same as 'mul'. + return TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind); } // end of switch. } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll index 247ea35ff5d0..3061998518ad 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll @@ -6,7 +6,7 @@ target triple = "aarch64--linux-gnu" ; CHECK-LABEL: all_scalar ; CHECK: LV: Found scalar instruction: %i.next = add nuw nsw i64 %i, 2 -; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %i.next = add nuw nsw i64 %i, 2 +; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %i.next = add nuw nsw i64 %i, 2 ; CHECK: LV: Not considering vector loop of width 2 because it will not generate any vector instructions ; define void @all_scalar(i64* %a, i64 %n) { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll index b0ebb4edf2ad..858b28ddd321 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll @@ -86,6 +86,41 @@ for.end: ret void } +; CHECK-LABEL: predicated_store_phi +; +; Same as predicate_store except we use a pointer PHI to maintain the address +; +; CHECK: Found new scalar instruction: %addr = phi i32* [ %a, %entry ], [ %addr.next, %for.inc ] +; CHECK: Found new scalar instruction: %addr.next = getelementptr inbounds i32, i32* %addr, i64 1 +; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %addr, align 4 +; CHECK: Found an estimated cost of 0 for VF 2 For instruction: %addr = phi i32* [ %a, %entry ], [ %addr.next, %for.inc ] +; CHECK: Found an estimated cost of 3 for VF 2 For instruction: store i32 %tmp2, i32* %addr, align 4 +; +define void @predicated_store_phi(i32* %a, i1 %c, i32 %x, i64 %n) { +entry: + br label %for.body + +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ] + %addr = phi i32 * [ %a, %entry ], [ %addr.next, %for.inc ] + %tmp1 = load i32, i32* %addr, align 4 + %tmp2 = add nsw i32 %tmp1, %x + br i1 %c, label %if.then, label %for.inc + +if.then: + store i32 %tmp2, i32* %addr, align 4 + br label %for.inc + +for.inc: + %i.next = add nuw nsw i64 %i, 1 + %cond = icmp slt i64 %i.next, %n + %addr.next = getelementptr inbounds i32, i32* %addr, i64 1 + br i1 %cond, label %for.body, label %for.end + +for.end: + ret void +} + ; CHECK-LABEL: predicated_udiv_scalarized_operand ; ; This test checks that we correctly compute the cost of the predicated udiv diff --git a/llvm/test/Transforms/LoopVectorize/scalarized-bitcast.ll b/llvm/test/Transforms/LoopVectorize/scalarized-bitcast.ll new file mode 100644 index 000000000000..0c97e6ac475e --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/scalarized-bitcast.ll @@ -0,0 +1,40 @@ +; REQUIRES: asserts +; RUN: opt -loop-vectorize -force-vector-width=2 -debug-only=loop-vectorize -S -o - < %s 2>&1 | FileCheck %s + +%struct.foo = type { i32, i64 } + +; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %0 = bitcast i64* %b to i32* + +; The bitcast below will be scalarized due to the predication in the loop. Bitcasts +; between pointer types should be treated as free, despite the scalarization. +define void @foo(%struct.foo* noalias nocapture %in, i32* noalias nocapture readnone %out, i64 %n) { +entry: + br label %for.body + +for.body: ; preds = %entry, %if.end + %i.012 = phi i64 [ %inc, %if.end ], [ 0, %entry ] + %b = getelementptr inbounds %struct.foo, %struct.foo* %in, i64 %i.012, i32 1 + %0 = bitcast i64* %b to i32* + %a = getelementptr inbounds %struct.foo, %struct.foo* %in, i64 %i.012, i32 0 + %1 = load i32, i32* %a, align 8 + %tobool.not = icmp eq i32 %1, 0 + br i1 %tobool.not, label %if.end, label %land.lhs.true + +land.lhs.true: ; preds = %for.body + %2 = load i32, i32* %0, align 4 + %cmp2 = icmp sgt i32 %2, 0 + br i1 %cmp2, label %if.then, label %if.end + +if.then: ; preds = %land.lhs.true + %sub = add nsw i32 %2, -1 + store i32 %sub, i32* %0, align 4 + br label %if.end + +if.end: ; preds = %if.then, %land.lhs.true, %for.body + %inc = add nuw nsw i64 %i.012, 1 + %exitcond.not = icmp eq i64 %inc, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %if.end + ret void +} </cut>

3 years, 10 months

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

linaro-toolchain