- linaro-toolchain - lists.linaro.org

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/llvm-master-aarch64-spec2k6-O2 - Build # 15 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O2. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O2 Culprit: <cut> commit a0a9c9e188f5b97ff8b74287d1536f57ec5dda54 Author: Sanjay Patel <spatel(a)rotateright.com> Date: Wed Aug 11 12:41:47 2021 -0400 [InstCombine] avoid breaking up min/max (cmp+sel) idioms This is a quick fix for a motivating case that looks like this: https://godbolt.org/z/GeMqzMc38 As noted, we might be able to restore the min/max patterns with select folds, or we just wait for this to become easier with canonicalization to min/max intrinsics. </cut> Results regressed to (for first_bad == a0a9c9e188f5b97ff8b74287d1536f57ec5dda54) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2 artifacts/build-a0a9c9e188f5b97ff8b74287d1536f57ec5dda54/results_id: 1 # 464.h264ref,h264ref_base.default regressed by 106 # 464.h264ref,[.] FastFullPelBlockMotionSearch regressed by 146 from (for last_good == 5bf4ab0e79e1a8552019918a662bdf7af8b3825a) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2 artifacts/build-5bf4ab0e79e1a8552019918a662bdf7af8b3825a/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O2/3875 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O2/3877 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-a0a9c9e188f5b97ff8b74287d1536f57ec5dda54 cd investigate-llvm-a0a9c9e188f5b97ff8b74287d1536f57ec5dda54 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach a0a9c9e188f5b97ff8b74287d1536f57ec5dda54 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 5bf4ab0e79e1a8552019918a662bdf7af8b3825a ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Full commit (up to 1000 lines): <cut> commit a0a9c9e188f5b97ff8b74287d1536f57ec5dda54 Author: Sanjay Patel <spatel(a)rotateright.com> Date: Wed Aug 11 12:41:47 2021 -0400 [InstCombine] avoid breaking up min/max (cmp+sel) idioms This is a quick fix for a motivating case that looks like this: https://godbolt.org/z/GeMqzMc38 As noted, we might be able to restore the min/max patterns with select folds, or we just wait for this to become easier with canonicalization to min/max intrinsics. --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 12 +++++++++--- llvm/test/Transforms/InstCombine/icmp-add.ll | 13 ++++++------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 2e20bca300d3..71037616585c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5755,9 +5755,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpWithDominatingICmp(I)) return Res; - if (Instruction *Res = foldICmpBinOp(I, Q)) - return Res; - if (Instruction *Res = foldICmpUsingKnownBits(I)) return Res; @@ -5803,6 +5800,15 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { } } + // The folds in here may rely on wrapping flags and special constants, so + // they can break up min/max idioms in some cases but not seemingly similar + // patterns. + // FIXME: It may be possible to enhance select folding to make this + // unnecessary. It may also be moot if we canonicalize to min/max + // intrinsics. + if (Instruction *Res = foldICmpBinOp(I, Q)) + return Res; + if (Instruction *Res = foldICmpInstWithConstant(I)) return Res; diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll index 187e0ad1a31b..1750b5685c50 100644 --- a/llvm/test/Transforms/InstCombine/icmp-add.ll +++ b/llvm/test/Transforms/InstCombine/icmp-add.ll @@ -972,7 +972,6 @@ define i1 @slt_offset_nsw(i8 %a, i8 %c) { ret i1 %ov } -; FIXME: ; In the following 4 tests, we could push the inc/dec ; through the min/max, but we should not break up the ; min/max idiom by using different icmp and select @@ -980,9 +979,9 @@ define i1 @slt_offset_nsw(i8 %a, i8 %c) { define i32 @increment_max(i32 %x) { ; CHECK-LABEL: @increment_max( -; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X:%.*]], 1 -; CHECK-NEXT: [[C_INV:%.*]] = icmp slt i32 [[X]], 0 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C_INV]], i32 0, i32 [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -1 +; CHECK-NEXT: [[S:%.*]] = add nsw i32 [[TMP2]], 1 ; CHECK-NEXT: ret i32 [[S]] ; %a = add nsw i32 %x, 1 @@ -1019,9 +1018,9 @@ define i32 @increment_min(i32 %x) { define i32 @decrement_min(i32 %x) { ; CHECK-LABEL: @decrement_min( -; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X:%.*]], -1 -; CHECK-NEXT: [[C_INV:%.*]] = icmp sgt i32 [[X]], 0 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C_INV]], i32 0, i32 [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 1 +; CHECK-NEXT: [[S:%.*]] = add nsw i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[S]] ; %a = add nsw i32 %x, -1 </cut>

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_apm/llvm-master-aarch64-spec2k6-Os - Build # 4 - Fixed!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_apm/llvm-master-aarch64-spec2k6-Os. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_apm/llvm-master-aarch64-spec2k6-Os Culprit: <cut> commit dae7adda949993bd96aa50c551dc64ddebba7923 Author: Matt Jacobson <mhjacobson(a)me.com> Date: Fri Aug 6 10:12:00 2021 +0800 [AVR][clang] Pass '-fno-use-init-array' to cc1 as default On AVR, '.ctors' is used, not '.init_array'. Make this the default unless specifically overridden by driver argument. This matches gcc, and it matches the behavior in (e.g.) the NetBSD driver (for certain OS variants). Reviewed by: MaskRay Differential Revision: https://reviews.llvm.org/D107610 </cut> Results regressed to (for first_bad == dae7adda949993bd96aa50c551dc64ddebba7923) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Os artifacts/build-dae7adda949993bd96aa50c551dc64ddebba7923/results_id: 1 # 400.perlbench,perlbench_base.default regressed by 94393 # 453.povray,povray_base.default regressed by 102 # 470.lbm,lbm_base.default regressed by 103 # 470.lbm,[.] LBM_performStreamCollide regressed by 118 from (for last_good == 66b1e629d89543cb7542c184f7dfb32deee732e1) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Os artifacts/build-66b1e629d89543cb7542c184f7dfb32deee732e1/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Results ID of last_good: apm_64/tcwg_bmk_llvm_apm/bisect-llvm-master-aarch64-spec2k6-Os/3855 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Results ID of first_bad: apm_64/tcwg_bmk_llvm_apm/bisect-llvm-master-aarch64-spec2k6-Os/3852 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-dae7adda949993bd96aa50c551dc64ddebba7923 cd investigate-llvm-dae7adda949993bd96aa50c551dc64ddebba7923 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach dae7adda949993bd96aa50c551dc64ddebba7923 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 66b1e629d89543cb7542c184f7dfb32deee732e1 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Full commit (up to 1000 lines): <cut> commit dae7adda949993bd96aa50c551dc64ddebba7923 Author: Matt Jacobson <mhjacobson(a)me.com> Date: Fri Aug 6 10:12:00 2021 +0800 [AVR][clang] Pass '-fno-use-init-array' to cc1 as default On AVR, '.ctors' is used, not '.init_array'. Make this the default unless specifically overridden by driver argument. This matches gcc, and it matches the behavior in (e.g.) the NetBSD driver (for certain OS variants). Reviewed by: MaskRay Differential Revision: https://reviews.llvm.org/D107610 --- clang/lib/Driver/ToolChains/AVR.cpp | 10 ++++++++++ clang/lib/Driver/ToolChains/AVR.h | 7 ++++++- clang/test/Driver/avr-toolchain.c | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChains/AVR.cpp b/clang/lib/Driver/ToolChains/AVR.cpp index 5b097f9b2ed9..18c6f41e22b1 100644 --- a/clang/lib/Driver/ToolChains/AVR.cpp +++ b/clang/lib/Driver/ToolChains/AVR.cpp @@ -370,6 +370,16 @@ void AVRToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, addSystemInclude(DriverArgs, CC1Args, AVRInc); } +void AVRToolChain::addClangTargetOptions( + const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, + Action::OffloadKind DeviceOffloadKind) const { + // By default, use `.ctors` (not `.init_array`), as required by libgcc, which + // runs constructors/destructors on AVR. + if (!DriverArgs.hasFlag(options::OPT_fuse_init_array, + options::OPT_fno_use_init_array, false)) + CC1Args.push_back("-fno-use-init-array"); +} + Tool *AVRToolChain::buildLinker() const { return new tools::AVR::Linker(getTriple(), *this, LinkStdlib); } diff --git a/clang/lib/Driver/ToolChains/AVR.h b/clang/lib/Driver/ToolChains/AVR.h index f612aa691182..2d027957ed76 100644 --- a/clang/lib/Driver/ToolChains/AVR.h +++ b/clang/lib/Driver/ToolChains/AVR.h @@ -11,8 +11,8 @@ #include "Gnu.h" #include "clang/Driver/InputInfo.h" -#include "clang/Driver/ToolChain.h" #include "clang/Driver/Tool.h" +#include "clang/Driver/ToolChain.h" namespace clang { namespace driver { @@ -26,6 +26,11 @@ public: AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; + void + addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args, + Action::OffloadKind DeviceOffloadKind) const override; + protected: Tool *buildLinker() const override; diff --git a/clang/test/Driver/avr-toolchain.c b/clang/test/Driver/avr-toolchain.c index 692063dc2c34..877f650a3d02 100644 --- a/clang/test/Driver/avr-toolchain.c +++ b/clang/test/Driver/avr-toolchain.c @@ -1,7 +1,7 @@ // A basic clang -cc1 command-line. // RUN: %clang %s -### -no-canonical-prefixes -target avr 2>&1 | FileCheck -check-prefix=CC1 %s -// CC1: clang{{.*}} "-cc1" "-triple" "avr" +// CC1: clang{{.*}} "-cc1" "-triple" "avr" {{.*}} "-fno-use-init-array" // RUN: %clang %s -### -no-canonical-prefixes -target avr --sysroot %S/Inputs/basic_avr_tree 2>&1 | FileCheck -check-prefix CC1A %s // CC1A: clang{{.*}} "-cc1" "-triple" "avr" {{.*}} "-internal-isystem" {{".*avr/include"}} </cut>

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tk1/llvm-release-arm-spec2k6-O3 - Build # 9 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O3. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O3 Culprit: <cut> commit cd6de0e8de4a5fd558580be4b1a07116914fc8ed Author: Sjoerd Meijer <sjoerd.meijer(a)arm.com> Date: Fri Feb 12 15:15:05 2021 +0000 [TTI] Unify FavorPostInc and FavorBackedgeIndex into getPreferredAddressingMode This refactors shouldFavorPostInc() and shouldFavorBackedgeIndex() into getPreferredAddressingMode() so that we have one interface to steer LSR in generating the preferred addressing mode. Differential Revision: https://reviews.llvm.org/D96600 </cut> Results regressed to (for first_bad == cd6de0e8de4a5fd558580be4b1a07116914fc8ed) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_marm artifacts/build-cd6de0e8de4a5fd558580be4b1a07116914fc8ed/results_id: 1 # 482.sphinx3,sphinx_livepretend_base.default regressed by 103 # 482.sphinx3,[.] vector_gautbl_eval_logs3 regressed by 111 from (for last_good == 4bd5bd40094c7b8b691cf394d813efc48d82acfd) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_marm artifacts/build-4bd5bd40094c7b8b691cf394d813efc48d82acfd/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of last_good: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O3/3835 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of first_bad: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O3/3840 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-cd6de0e8de4a5fd558580be4b1a07116914fc8ed cd investigate-llvm-cd6de0e8de4a5fd558580be4b1a07116914fc8ed git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach cd6de0e8de4a5fd558580be4b1a07116914fc8ed ../artifacts/test.sh # Reproduce last_good build git checkout --detach 4bd5bd40094c7b8b691cf394d813efc48d82acfd ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Full commit (up to 1000 lines): <cut> commit cd6de0e8de4a5fd558580be4b1a07116914fc8ed Author: Sjoerd Meijer <sjoerd.meijer(a)arm.com> Date: Fri Feb 12 15:15:05 2021 +0000 [TTI] Unify FavorPostInc and FavorBackedgeIndex into getPreferredAddressingMode This refactors shouldFavorPostInc() and shouldFavorBackedgeIndex() into getPreferredAddressingMode() so that we have one interface to steer LSR in generating the preferred addressing mode. Differential Revision: https://reviews.llvm.org/D96600 --- llvm/include/llvm/Analysis/TargetTransformInfo.h | 25 ++++++++++++---------- .../llvm/Analysis/TargetTransformInfoImpl.h | 7 +++--- llvm/lib/Analysis/TargetTransformInfo.cpp | 10 ++++----- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 22 ++++++++++--------- llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 4 ++-- .../Target/Hexagon/HexagonTargetTransformInfo.cpp | 5 +++-- .../Target/Hexagon/HexagonTargetTransformInfo.h | 3 ++- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 24 ++++++++++++--------- 8 files changed, 55 insertions(+), 45 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index c3d7d2cc80a4..79303dab92a2 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -638,13 +638,15 @@ public: DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const; - /// \return True is LSR should make efforts to create/preserve post-inc - /// addressing mode expressions. - bool shouldFavorPostInc() const; + enum AddressingModeKind { + AMK_PreIndexed, + AMK_PostIndexed, + AMK_None + }; - /// Return true if LSR should make efforts to generate indexed addressing - /// modes that operate across loop iterations. - bool shouldFavorBackedgeIndex(const Loop *L) const; + /// Return the preferred addressing mode LSR should make efforts to generate. + AddressingModeKind getPreferredAddressingMode(const Loop *L, + ScalarEvolution *SE) const; /// Return true if the target supports masked store. bool isLegalMaskedStore(Type *DataType, Align Alignment) const; @@ -1454,8 +1456,8 @@ public: virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) = 0; - virtual bool shouldFavorPostInc() const = 0; - virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0; + virtual AddressingModeKind + getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0; virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0; virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0; virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0; @@ -1796,9 +1798,10 @@ public: TargetLibraryInfo *LibInfo) override { return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo); } - bool shouldFavorPostInc() const override { return Impl.shouldFavorPostInc(); } - bool shouldFavorBackedgeIndex(const Loop *L) const override { - return Impl.shouldFavorBackedgeIndex(L); + AddressingModeKind + getPreferredAddressingMode(const Loop *L, + ScalarEvolution *SE) const override { + return Impl.getPreferredAddressingMode(L, SE); } bool isLegalMaskedStore(Type *DataType, Align Alignment) override { return Impl.isLegalMaskedStore(DataType, Alignment); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 84de5038df42..a9c9d3cb9f4f 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -209,9 +209,10 @@ public: return false; } - bool shouldFavorPostInc() const { return false; } - - bool shouldFavorBackedgeIndex(const Loop *L) const { return false; } + TTI::AddressingModeKind + getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const { + return TTI::AMK_None; + } bool isLegalMaskedStore(Type *DataType, Align Alignment) const { return false; diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 16992d099e0a..3db4b0b0d553 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -409,12 +409,10 @@ bool TargetTransformInfo::canSaveCmp(Loop *L, BranchInst **BI, return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo); } -bool TargetTransformInfo::shouldFavorPostInc() const { - return TTIImpl->shouldFavorPostInc(); -} - -bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const { - return TTIImpl->shouldFavorBackedgeIndex(L); +TTI::AddressingModeKind +TargetTransformInfo::getPreferredAddressingMode(const Loop *L, + ScalarEvolution *SE) const { + return TTIImpl->getPreferredAddressingMode(L, SE); } bool TargetTransformInfo::isLegalMaskedStore(Type *DataType, diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 80f1f2a2a8f7..8c2a79efc674 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -100,18 +100,20 @@ bool ARMTTIImpl::areInlineCompatible(const Function *Caller, return MatchExact && MatchSubset; } -bool ARMTTIImpl::shouldFavorBackedgeIndex(const Loop *L) const { - if (L->getHeader()->getParent()->hasOptSize()) - return false; +TTI::AddressingModeKind +ARMTTIImpl::getPreferredAddressingMode(const Loop *L, + ScalarEvolution *SE) const { if (ST->hasMVEIntegerOps()) - return false; - return ST->isMClass() && ST->isThumb2() && L->getNumBlocks() == 1; -} + return TTI::AMK_PostIndexed; -bool ARMTTIImpl::shouldFavorPostInc() const { - if (ST->hasMVEIntegerOps()) - return true; - return false; + if (L->getHeader()->getParent()->hasOptSize()) + return TTI::AMK_None; + + if (ST->isMClass() && ST->isThumb2() && + L->getNumBlocks() == 1) + return TTI::AMK_PreIndexed; + + return TTI::AMK_None; } Optional<Instruction *> diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index b8de27101a61..808128929000 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -103,8 +103,8 @@ public: bool enableInterleavedAccessVectorization() { return true; } - bool shouldFavorBackedgeIndex(const Loop *L) const; - bool shouldFavorPostInc() const; + TTI::AddressingModeKind + getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const; /// Floating-point computation using ARMv8 AArch32 Advanced /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index af7bc4682249..89e7df0aa27e 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -80,8 +80,9 @@ void HexagonTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, } } -bool HexagonTTIImpl::shouldFavorPostInc() const { - return true; +AddressingModeKind::getPreferredAddressingMode(const Loop *L, + ScalarEvolution *SE) const { + return AMK_PostIndexed; } /// --- Vector TTI begin --- diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index dc075d6147b6..ebaa619837f0 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -67,7 +67,8 @@ public: TTI::PeelingPreferences &PP); /// Bias LSR towards creating post-increment opportunities. - bool shouldFavorPostInc() const; + AddressingModeKind getPreferredAddressingMode(const Loop *L, + ScalarEvolution *SE) const; // L1 cache prefetch. unsigned getPrefetchDistance() const override; diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 5dec9b542076..2f90df70a3c3 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1227,13 +1227,15 @@ static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) { /// Tally up interesting quantities from the given register. void Cost::RateRegister(const Formula &F, const SCEV *Reg, SmallPtrSetImpl<const SCEV *> &Regs) { + TTI::AddressingModeKind AMK = TTI->getPreferredAddressingMode(L, SE); + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) { // If this is an addrec for another loop, it should be an invariant // with respect to L since L is the innermost loop (at least // for now LSR only handles innermost loops). if (AR->getLoop() != L) { // If the AddRec exists, consider it's register free and leave it alone. - if (isExistingPhi(AR, *SE) && !TTI->shouldFavorPostInc()) + if (isExistingPhi(AR, *SE) && AMK != TTI::AMK_PostIndexed) return; // It is bad to allow LSR for current loop to add induction variables @@ -1254,13 +1256,11 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg, // If the step size matches the base offset, we could use pre-indexed // addressing. - if (TTI->shouldFavorBackedgeIndex(L)) { + if (AMK == TTI::AMK_PreIndexed) { if (auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE))) if (Step->getAPInt() == F.BaseOffset) LoopCost = 0; - } - - if (TTI->shouldFavorPostInc()) { + } else if (AMK == TTI::AMK_PostIndexed) { const SCEV *LoopStep = AR->getStepRecurrence(*SE); if (isa<SCEVConstant>(LoopStep)) { const SCEV *LoopStart = AR->getStart(); @@ -3575,7 +3575,8 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, // may generate a post-increment operator. The reason is that the // reassociations cause extra base+register formula to be created, // and possibly chosen, but the post-increment is more efficient. - if (TTI.shouldFavorPostInc() && mayUsePostIncMode(TTI, LU, BaseReg, L, SE)) + TTI::AddressingModeKind AMK = TTI.getPreferredAddressingMode(L, &SE); + if (AMK == TTI::AMK_PostIndexed && mayUsePostIncMode(TTI, LU, BaseReg, L, SE)) return; SmallVector<const SCEV *, 8> AddOps; const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE); @@ -4239,7 +4240,8 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm; if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, NewF)) { - if (TTI.shouldFavorPostInc() && + if (TTI.getPreferredAddressingMode(this->L, &SE) == + TTI::AMK_PostIndexed && mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE)) continue; if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) @@ -4679,7 +4681,7 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() { /// If we are over the complexity limit, filter out any post-inc prefering /// variables to only post-inc values. void LSRInstance::NarrowSearchSpaceByFilterPostInc() { - if (!TTI.shouldFavorPostInc()) + if (TTI.getPreferredAddressingMode(L, &SE) != TTI::AMK_PostIndexed) return; if (EstimateSearchSpaceComplexity() < ComplexityLimit) return; @@ -4978,7 +4980,8 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution, // This can sometimes (notably when trying to favour postinc) lead to // sub-optimial decisions. There it is best left to the cost modelling to // get correct. - if (!TTI.shouldFavorPostInc() || LU.Kind != LSRUse::Address) { + if (TTI.getPreferredAddressingMode(L, &SE) != TTI::AMK_PostIndexed || + LU.Kind != LSRUse::Address) { int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size()); for (const SCEV *Reg : ReqRegs) { if ((F.ScaledReg && F.ScaledReg == Reg) || @@ -5560,7 +5563,8 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU) : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L), MSSAU(MSSAU), FavorBackedgeIndex(EnableBackedgeIndexing && - TTI.shouldFavorBackedgeIndex(L)) { + TTI.getPreferredAddressingMode(L, &SE) == + TTI::AMK_PreIndexed) { // If LoopSimplify form is not available, stay out of trouble. if (!L->isLoopSimplifyForm()) return; </cut>

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_kernel/llvm-master-aarch64-lts-allmodconfig - Build # 6 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *linux* in CI configuration tcwg_kernel/llvm-master-aarch64-lts-allmodconfig. So far, this commit has regressed CI configurations: - tcwg_kernel/llvm-master-aarch64-lts-allmodconfig Culprit: <cut> commit 132a8267adabd645476b542b3b132c1b91988fe8 Author: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Date: Thu Aug 12 13:22:21 2021 +0200 Linux 5.10.58 Link: https://lore.kernel.org/r/20210810172955.660225700@linuxfoundation.org Tested-by: Fox Chen <foxhlchen(a)gmail.com> Tested-by: Hulk Robot <hulkrobot(a)huawei.com> Tested-by: Sudip Mukherjee <sudip.mukherjee(a)codethink.co.uk> Tested-by: Linux Kernel Functional Testing <lkft(a)linaro.org> Tested-by: Guenter Roeck <linux(a)roeck-us.net> Tested-by: Shuah Khan <skhan(a)linuxfoundation.org> Tested-by: Aakash Hemadri <aakashhemadri123(a)gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> </cut> Results regressed to (for first_bad == 132a8267adabd645476b542b3b132c1b91988fe8) # reset_artifacts: -10 # build_abe binutils: -9 # build_llvm: -5 # build_abe qemu: -2 # linux_n_obj: 28702 # linux build successful: all # First few build errors in logs: from (for last_good == 3d7d1b0f5f41d66a2d177f9fdcdb32e23a4b2513) # reset_artifacts: -10 # build_abe binutils: -9 # build_llvm: -5 # build_abe qemu: -2 # linux_n_obj: 28702 # linux build successful: all # linux boot successful: boot Artifacts of last_good build: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-aarch64-lts-a… Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-aarch64-lts-a… Build top page/logs: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-aarch64-lts-a… Configuration details: Reproduce builds: <cut> mkdir investigate-linux-132a8267adabd645476b542b3b132c1b91988fe8 cd investigate-linux-132a8267adabd645476b542b3b132c1b91988fe8 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-aarch64-lts-a… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-aarch64-lts-a… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-aarch64-lts-a… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_kernel-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /linux/ ./ ./bisect/baseline/ cd linux # Reproduce first_bad build git checkout --detach 132a8267adabd645476b542b3b132c1b91988fe8 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 3d7d1b0f5f41d66a2d177f9fdcdb32e23a4b2513 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-aarch64-lts-a… Build log: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-aarch64-lts-a… Full commit (up to 1000 lines): <cut> commit 132a8267adabd645476b542b3b132c1b91988fe8 Author: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Date: Thu Aug 12 13:22:21 2021 +0200 Linux 5.10.58 Link: https://lore.kernel.org/r/20210810172955.660225700@linuxfoundation.org Tested-by: Fox Chen <foxhlchen(a)gmail.com> Tested-by: Hulk Robot <hulkrobot(a)huawei.com> Tested-by: Sudip Mukherjee <sudip.mukherjee(a)codethink.co.uk> Tested-by: Linux Kernel Functional Testing <lkft(a)linaro.org> Tested-by: Guenter Roeck <linux(a)roeck-us.net> Tested-by: Shuah Khan <skhan(a)linuxfoundation.org> Tested-by: Aakash Hemadri <aakashhemadri123(a)gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e9621a90e752..232dee1140c1 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 57 +SUBLEVEL = 58 EXTRAVERSION = NAME = Dare mighty things </cut>

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/llvm-master-aarch64-spec2k6-O2_LTO - Build # 22 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O2_LTO. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O2_LTO Culprit: <cut> commit 4389a413e2129d7d55ee779638b649aa852b6f8a Author: Zahira Ammarguellat <zahira.ammarguellat(a)intel.com> Date: Fri Aug 6 12:01:47 2021 -0700 Revert "[clang][fpenv][patch] Change clang option -ffp-model=precise to select ffp-contract=on" This reverts commit 48ad446a0fb2c9b98cb7047e4daf8a84c29cef8f. </cut> Results regressed to (for first_bad == 4389a413e2129d7d55ee779638b649aa852b6f8a) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2_LTO artifacts/build-4389a413e2129d7d55ee779638b649aa852b6f8a/results_id: 1 # 444.namd,namd_base.default regressed by 104 # 447.dealII,dealII_base.default regressed by 105 from (for last_good == dfce2909ee1ea1523ec27b834a0e56429e9c2beb) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2_LTO artifacts/build-dfce2909ee1ea1523ec27b834a0e56429e9c2beb/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O2_LTO/3823 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O2_LTO/3819 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-4389a413e2129d7d55ee779638b649aa852b6f8a cd investigate-llvm-4389a413e2129d7d55ee779638b649aa852b6f8a git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach 4389a413e2129d7d55ee779638b649aa852b6f8a ../artifacts/test.sh # Reproduce last_good build git checkout --detach dfce2909ee1ea1523ec27b834a0e56429e9c2beb ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Full commit (up to 1000 lines): <cut> commit 4389a413e2129d7d55ee779638b649aa852b6f8a Author: Zahira Ammarguellat <zahira.ammarguellat(a)intel.com> Date: Fri Aug 6 12:01:47 2021 -0700 Revert "[clang][fpenv][patch] Change clang option -ffp-model=precise to select ffp-contract=on" This reverts commit 48ad446a0fb2c9b98cb7047e4daf8a84c29cef8f. --- clang/docs/UsersManual.rst | 48 ++----------------------- clang/lib/Driver/ToolChains/Clang.cpp | 33 ++++++++--------- clang/test/CodeGen/ffp-contract-option.c | 47 +++--------------------- clang/test/CodeGen/ppc-emmintrin.c | 4 +-- clang/test/CodeGen/ppc-xmmintrin.c | 4 +-- clang/test/Driver/fp-model.c | 61 +++++++++++++++----------------- 6 files changed, 58 insertions(+), 139 deletions(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 838669794ea8..980d0ab45975 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1260,50 +1260,8 @@ installed. Controlling Floating Point Behavior ----------------------------------- -Clang provides a number of ways to control floating point behavior, including -with command line options and source pragmas. This section -describes the various floating point semantic modes and the corresponding options. - -.. csv-table:: Floating Point Semantic Modes - :header: "Mode", "Values" - :widths: 15, 30, 30 - - "except_behavior", "{ignore, strict, may_trap}", "ffp-exception-behavior" - "fenv_access", "{off, on}", "(none)" - "rounding_mode", "{dynamic, tonearest, downward, upward, towardzero}", "frounding-math" - "contract", "{on, off, fast}", "ffp-contract" - "denormal_fp_math", "{IEEE, PreserveSign, PositiveZero}", "fdenormal-fp-math" - "denormal_fp32_math", "{IEEE, PreserveSign, PositiveZero}", "fdenormal-fp-math-fp32" - "support_math_errno", "{on, off}", "fmath-errno" - "no_honor_nans", "{on, off}", "fhonor-nans" - "no_honor_infinities", "{on, off}", "fhonor-infinities" - "no_signed_zeros", "{on, off}", "fsigned-zeros" - "allow_reciprocal", "{on, off}", "freciprocal-math" - "allow_approximate_fns", "{on, off}", "(none)" - "allow_reassociation", "{on, off}", "fassociative-math" - - -This table describes the option settings that correspond to the three -floating point semantic models: precise (the default), strict, and fast. - - -.. csv-table:: Floating Point Models - :header: "Mode", "Precise", "Strict", "Fast" - :widths: 25, 15, 15, 15 - - "except_behavior", "ignore", "strict", "ignore" - "fenv_access", "off", "on", "off" - "rounding_mode", "tonearest", "dynamic", "tonearest" - "contract", "on", "off", "fast" - "denormal_fp_math", "IEEE", "IEEE", "PreserveSign" - "denormal_fp32_math", "IEEE","IEEE", "PreserveSign" - "support_math_errno", "on", "on", "off" - "no_honor_nans", "off", "off", "on" - "no_honor_infinities", "off", "off", "on" - "no_signed_zeros", "off", "off", "on" - "allow_reciprocal", "off", "off", "on" - "allow_approximate_fns", "off", "off", "on" - "allow_reassociation", "off", "off", "on" +Clang provides a number of ways to control floating point behavior. The options +are listed below. .. option:: -ffast-math @@ -1498,7 +1456,7 @@ Note that floating-point operations performed as part of constant initialization and ``fast``. Details: - * ``precise`` Disables optimizations that are not value-safe on floating-point data, although FP contraction (FMA) is enabled (``-ffp-contract=on``). This is the default behavior. + * ``precise`` Disables optimizations that are not value-safe on floating-point data, although FP contraction (FMA) is enabled (``-ffp-contract=fast``). This is the default behavior. * ``strict`` Enables ``-frounding-math`` and ``-ffp-exception-behavior=strict``, and disables contractions (FMA). All of the ``-ffast-math`` enablements are disabled. Enables ``STDC FENV_ACCESS``: by default ``FENV_ACCESS`` is disabled. This option setting behaves as though ``#pragma STDC FENV_ACESS ON`` appeared at the top of the source file. * ``fast`` Behaves identically to specifying both ``-ffast-math`` and ``ffp-contract=fast`` diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 1c79640be80f..96bbc0250126 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2641,7 +2641,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, llvm::DenormalMode DenormalFPMath = DefaultDenormalFPMath; llvm::DenormalMode DenormalFP32Math = DefaultDenormalFP32Math; - StringRef FPContract = "on"; + StringRef FPContract = ""; bool StrictFPModel = false; @@ -2666,7 +2666,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, ReciprocalMath = false; SignedZeros = true; // -fno_fast_math restores default denormal and fpcontract handling - FPContract = "on"; + FPContract = ""; DenormalFPMath = llvm::DenormalMode::getIEEE(); // FIXME: The target may have picked a non-IEEE default mode here based on @@ -2686,18 +2686,20 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, // ffp-model= is a Driver option, it is entirely rewritten into more // granular options before being passed into cc1. // Use the gcc option in the switch below. - if (!FPModel.empty() && !FPModel.equals(Val)) + if (!FPModel.empty() && !FPModel.equals(Val)) { D.Diag(clang::diag::warn_drv_overriding_flag_option) << Args.MakeArgString("-ffp-model=" + FPModel) << Args.MakeArgString("-ffp-model=" + Val); + FPContract = ""; + } if (Val.equals("fast")) { optID = options::OPT_ffast_math; FPModel = Val; - FPContract = Val; + FPContract = "fast"; } else if (Val.equals("precise")) { optID = options::OPT_ffp_contract; FPModel = Val; - FPContract = "on"; + FPContract = "fast"; PreciseFPModel = true; } else if (Val.equals("strict")) { StrictFPModel = true; @@ -2783,11 +2785,9 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, case options::OPT_ffp_contract: { StringRef Val = A->getValue(); if (PreciseFPModel) { - // When -ffp-model=precise is seen on the command line, - // the boolean PreciseFPModel is set to true which indicates - // "the current option is actually PreciseFPModel". The optID - // is changed to OPT_ffp_contract and FPContract is set to "on". - // the argument Val string is "precise": it shouldn't be checked. + // -ffp-model=precise enables ffp-contract=fast as a side effect + // the FPContract value has already been set to a string literal + // and the Val string isn't a pertinent value. ; } else if (Val.equals("fast") || Val.equals("on") || Val.equals("off")) FPContract = Val; @@ -2897,17 +2897,18 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, // -fno_fast_math restores default denormal and fpcontract handling DenormalFPMath = DefaultDenormalFPMath; DenormalFP32Math = llvm::DenormalMode::getIEEE(); - FPContract = "on"; + FPContract = ""; break; } if (StrictFPModel) { // If -ffp-model=strict has been specified on command line but // subsequent options conflict then emit warning diagnostic. - if (HonorINFs && HonorNaNs && !AssociativeMath && !ReciprocalMath && - SignedZeros && TrappingMath && RoundingFPMath && - DenormalFPMath == llvm::DenormalMode::getIEEE() && - DenormalFP32Math == llvm::DenormalMode::getIEEE() && - FPContract.equals("off")) + if (HonorINFs && HonorNaNs && + !AssociativeMath && !ReciprocalMath && + SignedZeros && TrappingMath && RoundingFPMath && + (FPContract.equals("off") || FPContract.empty()) && + DenormalFPMath == llvm::DenormalMode::getIEEE() && + DenormalFP32Math == llvm::DenormalMode::getIEEE()) // OK: Current Arg doesn't conflict with -ffp-model=strict ; else { diff --git a/clang/test/CodeGen/ffp-contract-option.c b/clang/test/CodeGen/ffp-contract-option.c index efc72c2b5461..52b750795940 100644 --- a/clang/test/CodeGen/ffp-contract-option.c +++ b/clang/test/CodeGen/ffp-contract-option.c @@ -1,46 +1,9 @@ -// RUN: %clang_cc1 -O3 -ffp-contract=fast -triple=aarch64-apple-darwin -S -o - %s | FileCheck --check-prefix=CHECK-FMADD %s +// RUN: %clang_cc1 -O3 -ffp-contract=fast -triple=aarch64-apple-darwin -S -o - %s | FileCheck %s // REQUIRES: aarch64-registered-target float fma_test1(float a, float b, float c) { -// CHECK-FMADD: fmadd - float x = a * b; - float y = x + c; - return y; -} - -// RUN: %clang_cc1 -triple=x86_64 %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-DEFAULT %s -// -// RUN: %clang_cc1 -triple=x86_64 -ffp-contract=off %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-DEFAULT %s -// RUN: %clang_cc1 -triple=x86_64 -ffp-contract=on %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-ON %s -// RUN: %clang_cc1 -triple=x86_64 -ffp-contract=fast %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-CONTRACTFAST %s -// -// RUN: %clang_cc1 -triple=x86_64 -ffast-math %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-DEFAULTFAST %s -// RUN: %clang_cc1 -triple=x86_64 -ffast-math -ffp-contract=off %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-DEFAULTFAST %s -// RUN: %clang_cc1 -triple=x86_64 -ffast-math -ffp-contract=on %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-ONFAST %s -// RUN: %clang_cc1 -triple=x86_64 -ffast-math -ffp-contract=fast %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-FASTFAST %s -float mymuladd( float x, float y, float z ) { - return x * y + z; - // CHECK-DEFAULT: = fmul float - // CHECK-DEFAULT: = fadd float - - // CHECK-ON: = call float @llvm.fmuladd.f32 - - // CHECK-CONTRACTFAST: = fmul contract float - // CHECK-CONTRACTFAST: = fadd contract float - - // CHECK-DEFAULTFAST: = fmul reassoc nnan ninf nsz arcp afn float - // CHECK-DEFAULTFAST: = fadd reassoc nnan ninf nsz arcp afn float - - // CHECK-ONFAST: = call reassoc nnan ninf nsz arcp afn float @llvm.fmuladd.f32 - - // CHECK-FASTFAST: = fmul fast float - // CHECK-FASTFAST: = fadd fast float +// CHECK: fmadd + float x = a * b; + float y = x + c; + return y; } diff --git a/clang/test/CodeGen/ppc-emmintrin.c b/clang/test/CodeGen/ppc-emmintrin.c index 4a246ff92d76..fa3801f50a01 100644 --- a/clang/test/CodeGen/ppc-emmintrin.c +++ b/clang/test/CodeGen/ppc-emmintrin.c @@ -2,9 +2,9 @@ // REQUIRES: powerpc-registered-target // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ -// RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ -// RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE // CHECK-BE-DAG: @_mm_movemask_pd.perm_mask = internal constant <4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 -2139078656>, align 16 // CHECK-BE-DAG: @_mm_shuffle_epi32.permute_selectors = internal constant [4 x i32] [i32 66051, i32 67438087, i32 134810123, i32 202182159], align 4 diff --git a/clang/test/CodeGen/ppc-xmmintrin.c b/clang/test/CodeGen/ppc-xmmintrin.c index a7f6ed6e0e67..d3f18bfbb1e5 100644 --- a/clang/test/CodeGen/ppc-xmmintrin.c +++ b/clang/test/CodeGen/ppc-xmmintrin.c @@ -2,11 +2,11 @@ // REQUIRES: powerpc-registered-target // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ -// RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE // RUN: %clang -x c++ -fsyntax-only -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ -// RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE // RUN: %clang -x c++ -fsyntax-only -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns diff --git a/clang/test/Driver/fp-model.c b/clang/test/Driver/fp-model.c index c6d683e25c0b..5fa9d110dd83 100644 --- a/clang/test/Driver/fp-model.c +++ b/clang/test/Driver/fp-model.c @@ -1,90 +1,88 @@ // Test that incompatible combinations of -ffp-model= options // and other floating point options get a warning diagnostic. +// +// REQUIRES: clang-driver -// RUN: %clang -target x86_64 -### -ffp-model=fast -ffp-contract=off -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=fast -ffp-contract=off -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN %s // WARN: warning: overriding '-ffp-model=fast' option with '-ffp-contract=off' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=fast -ffp-contract=on -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=fast -ffp-contract=on -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN1 %s // WARN1: warning: overriding '-ffp-model=fast' option with '-ffp-contract=on' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fassociative-math -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fassociative-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN2 %s // WARN2: warning: overriding '-ffp-model=strict' option with '-fassociative-math' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -ffast-math -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -ffast-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN3 %s // WARN3: warning: overriding '-ffp-model=strict' option with '-ffast-math' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -ffinite-math-only -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -ffinite-math-only -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN4 %s // WARN4: warning: overriding '-ffp-model=strict' option with '-ffinite-math-only' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -ffp-contract=fast -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -ffp-contract=fast -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN5 %s // WARN5: warning: overriding '-ffp-model=strict' option with '-ffp-contract=fast' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -ffp-contract=fast -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN6 %s -// WARN6: warning: overriding '-ffp-model=strict' option with '-ffp-contract=fast' [-Woverriding-t-option] - -// RUN: %clang -target x86_64 -### -ffp-model=strict -ffp-contract=on -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -ffp-contract=on -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN7 %s // WARN7: warning: overriding '-ffp-model=strict' option with '-ffp-contract=on' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fno-honor-infinities -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fno-honor-infinities -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN8 %s // WARN8: warning: overriding '-ffp-model=strict' option with '-fno-honor-infinities' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fno-honor-nans -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fno-honor-nans -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN9 %s // WARN9: warning: overriding '-ffp-model=strict' option with '-fno-honor-nans' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fno-rounding-math -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fno-rounding-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARNa %s // WARNa: warning: overriding '-ffp-model=strict' option with '-fno-rounding-math' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fno-signed-zeros -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fno-signed-zeros -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARNb %s // WARNb: warning: overriding '-ffp-model=strict' option with '-fno-signed-zeros' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fno-trapping-math -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fno-trapping-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARNc %s // WARNc: warning: overriding '-ffp-model=strict' option with '-fno-trapping-math' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -freciprocal-math -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -freciprocal-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARNd %s // WARNd: warning: overriding '-ffp-model=strict' option with '-freciprocal-math' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -funsafe-math-optimizations -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -funsafe-math-optimizations -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARNe %s // WARNe: warning: overriding '-ffp-model=strict' option with '-funsafe-math-optimizations' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -Ofast -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -Ofast -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARNf %s // WARNf: warning: overriding '-ffp-model=strict' option with '-Ofast' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fdenormal-fp-math=preserve-sign,preserve-sign -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fdenormal-fp-math=preserve-sign,preserve-sign -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN10 %s // WARN10: warning: overriding '-ffp-model=strict' option with '-fdenormal-fp-math=preserve-sign,preserve-sign' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -c %s 2>&1 \ +// RUN: %clang -### -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-NOROUND %s // CHECK-NOROUND: "-cc1" // CHECK-NOROUND: "-fno-rounding-math" -// RUN: %clang -target x86_64 -### -frounding-math -c %s 2>&1 \ +// RUN: %clang -### -frounding-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-ROUND --implicit-check-not ffp-exception-behavior=strict %s // CHECK-ROUND: "-cc1" // CHECK-ROUND: "-frounding-math" -// RUN: %clang -target x86_64 -### -ftrapping-math -c %s 2>&1 \ +// RUN: %clang -### -ftrapping-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-TRAP %s // CHECK-TRAP: "-cc1" // CHECK-TRAP: "-ffp-exception-behavior=strict" -// RUN: %clang -target x86_64 -### -nostdinc -ffp-model=fast -c %s 2>&1 \ +// RUN: %clang -### -nostdinc -ffp-model=fast -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FPM-FAST %s // CHECK-FPM-FAST: "-cc1" // CHECK-FPM-FAST: "-menable-no-infs" @@ -98,35 +96,34 @@ // CHECK-FPM-FAST: "-ffast-math" // CHECK-FPM-FAST: "-ffinite-math-only" -// RUN: %clang -target x86_64 -### -nostdinc -ffp-model=precise -c %s 2>&1 \ +// RUN: %clang -### -nostdinc -ffp-model=precise -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FPM-PRECISE %s // CHECK-FPM-PRECISE: "-cc1" -// CHECK-FPM-PRECISE: "-ffp-contract=on" +// CHECK-FPM-PRECISE: "-ffp-contract=fast" // CHECK-FPM-PRECISE: "-fno-rounding-math" -// RUN: %clang -target x86_64 -### -nostdinc -ffp-model=strict -c %s 2>&1 \ +// RUN: %clang -### -nostdinc -ffp-model=strict -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FPM-STRICT %s // CHECK-FPM-STRICT: "-cc1" -// CHECK-FPM-STRICT: "-fmath-errno" -// CHECK-FPM-STRICT: "-ffp-contract=off" // CHECK-FPM-STRICT: "-frounding-math" // CHECK-FPM-STRICT: "-ffp-exception-behavior=strict" -// RUN: %clang -target x86_64 -### -nostdinc -ffp-exception-behavior=strict -c %s 2>&1 \ +// RUN: %clang -### -nostdinc -ffp-exception-behavior=strict -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FEB-STRICT %s // CHECK-FEB-STRICT: "-cc1" // CHECK-FEB-STRICT: "-fno-rounding-math" // CHECK-FEB-STRICT: "-ffp-exception-behavior=strict" -// RUN: %clang -target x86_64 -### -nostdinc -ffp-exception-behavior=maytrap -c %s 2>&1 \ +// RUN: %clang -### -nostdinc -ffp-exception-behavior=maytrap -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FEB-MAYTRAP %s // CHECK-FEB-MAYTRAP: "-cc1" // CHECK-FEB-MAYTRAP: "-fno-rounding-math" // CHECK-FEB-MAYTRAP: "-ffp-exception-behavior=maytrap" -// RUN: %clang -target x86_64 -### -nostdinc -ffp-exception-behavior=ignore -c %s 2>&1 \ +// RUN: %clang -### -nostdinc -ffp-exception-behavior=ignore -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FEB-IGNORE %s // CHECK-FEB-IGNORE: "-cc1" // CHECK-FEB-IGNORE: "-fno-rounding-math" // CHECK-FEB-IGNORE: "-ffp-exception-behavior=ignore" + </cut>

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_gcc_bootstrap/master-aarch64-bootstrap_ubsan - Build # 1 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *gcc* in CI configuration tcwg_gcc_bootstrap/master-aarch64-bootstrap_ubsan. So far, this commit has regressed CI configurations: - tcwg_gcc_bootstrap/master-aarch64-bootstrap_ubsan Culprit: <cut> commit d1819df86fbe42125cccb2fc2959a0bf51e524d6 Author: Jonathan Wright <jonathan.wright(a)arm.com> Date: Mon Aug 16 14:37:18 2021 +0100 aarch64: Remove macros for vld4[q]_lane Neon intrinsics Remove macros for vld4[q]_lane Neon intrinsics. This is a preparatory step before adding new modes for structures of Advanced SIMD vectors. gcc/ChangeLog: 2021-08-16 Jonathan Wright <jonathan.wright(a)arm.com> * config/aarch64/arm_neon.h (__LD4_LANE_FUNC): Delete. (__LD4Q_LANE_FUNC): Likewise. (vld4_lane_u8): Define without macro. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. </cut> Results regressed to (for first_bad == d1819df86fbe42125cccb2fc2959a0bf51e524d6) # reset_artifacts: -10 # true: 0 # build_abe binutils: 1 # First few build errors in logs: # 00:10:53 make[3]: [Makefile:1769: aarch64-unknown-linux-gnu/bits/largefile-config.h] Error 1 (ignored) # 00:10:53 make[3]: [Makefile:1770: aarch64-unknown-linux-gnu/bits/largefile-config.h] Error 1 (ignored) # 00:26:15 /home/tcwg-buildslave/workspace/tcwg_gnu_2/abe/builds/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/gcc-gcc.git~master-stage2/prev-gcc/include/arm_neon.h:21081:11: error: cannot convert ‘float*’ to ‘const int*’ # 00:26:15 /home/tcwg-buildslave/workspace/tcwg_gnu_2/abe/builds/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/gcc-gcc.git~master-stage2/prev-gcc/include/arm_neon.h:21384:9: error: cannot convert ‘long int*’ to ‘const double*’ # 00:26:16 make[3]: *** [Makefile:226: lex.o] Error 1 # 00:26:30 make[2]: *** [Makefile:9758: all-stage2-libcpp] Error 2 # 00:28:15 make[1]: *** [Makefile:25899: stage2-bubble] Error 2 # 00:28:15 make: *** [Makefile:1010: all] Error 2 from (for last_good == 08f83812e5c5fdd9a7a4a1b9e46bb33725185c5a) # reset_artifacts: -10 # true: 0 # build_abe binutils: 1 # build_abe bootstrap_ubsan: 2 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-aarch64-bootstra… Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-aarch64-bootstra… Build top page/logs: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-aarch64-bootstra… Configuration details: Reproduce builds: <cut> mkdir investigate-gcc-d1819df86fbe42125cccb2fc2959a0bf51e524d6 cd investigate-gcc-d1819df86fbe42125cccb2fc2959a0bf51e524d6 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-aarch64-bootstra… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-aarch64-bootstra… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-aarch64-bootstra… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_gnu-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /gcc/ ./ ./bisect/baseline/ cd gcc # Reproduce first_bad build git checkout --detach d1819df86fbe42125cccb2fc2959a0bf51e524d6 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 08f83812e5c5fdd9a7a4a1b9e46bb33725185c5a ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-aarch64-bootstra… Build log: https://ci.linaro.org/job/tcwg_gcc_bootstrap-bisect-master-aarch64-bootstra… Full commit (up to 1000 lines): <cut> commit d1819df86fbe42125cccb2fc2959a0bf51e524d6 Author: Jonathan Wright <jonathan.wright(a)arm.com> Date: Mon Aug 16 14:37:18 2021 +0100 aarch64: Remove macros for vld4[q]_lane Neon intrinsics Remove macros for vld4[q]_lane Neon intrinsics. This is a preparatory step before adding new modes for structures of Advanced SIMD vectors. gcc/ChangeLog: 2021-08-16 Jonathan Wright <jonathan.wright(a)arm.com> * config/aarch64/arm_neon.h (__LD4_LANE_FUNC): Delete. (__LD4Q_LANE_FUNC): Likewise. (vld4_lane_u8): Define without macro. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. --- gcc/config/aarch64/arm_neon.h | 728 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 624 insertions(+), 104 deletions(-) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 29b62988a91..d8b29706a20 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -20856,110 +20856,595 @@ vld3q_lane_p64 (const poly64_t * __ptr, poly64x2x3_t __b, const int __c) /* vld4_lane */ -#define __LD4_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \ - qmode, ptrmode, funcsuffix, signedtype) \ -__extension__ extern __inline intype \ -__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \ -vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ -{ \ - __builtin_aarch64_simd_xi __o; \ - largetype __temp; \ - __temp.val[0] = \ - vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \ - __temp.val[1] = \ - vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \ - __temp.val[2] = \ - vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); \ - __temp.val[3] = \ - vcombine_##funcsuffix (__b.val[3], vcreate_##funcsuffix (0)); \ - __o = __builtin_aarch64_set_qregxi##qmode (__o, \ - (signedtype) __temp.val[0], \ - 0); \ - __o = __builtin_aarch64_set_qregxi##qmode (__o, \ - (signedtype) __temp.val[1], \ - 1); \ - __o = __builtin_aarch64_set_qregxi##qmode (__o, \ - (signedtype) __temp.val[2], \ - 2); \ - __o = __builtin_aarch64_set_qregxi##qmode (__o, \ - (signedtype) __temp.val[3], \ - 3); \ - __o = __builtin_aarch64_ld4_lane##mode ( \ - (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \ - __b.val[0] = (vectype) __builtin_aarch64_get_dregxidi (__o, 0); \ - __b.val[1] = (vectype) __builtin_aarch64_get_dregxidi (__o, 1); \ - __b.val[2] = (vectype) __builtin_aarch64_get_dregxidi (__o, 2); \ - __b.val[3] = (vectype) __builtin_aarch64_get_dregxidi (__o, 3); \ - return __b; \ +__extension__ extern __inline uint8x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_u8 (const uint8_t * __ptr, uint8x8x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + uint8x16x4_t __temp; + __temp.val[0] = vcombine_u8 (__b.val[0], vcreate_u8 (0)); + __temp.val[1] = vcombine_u8 (__b.val[1], vcreate_u8 (0)); + __temp.val[2] = vcombine_u8 (__b.val[2], vcreate_u8 (0)); + __temp.val[3] = vcombine_u8 (__b.val[3], vcreate_u8 (0)); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev8qi ( + (__builtin_aarch64_simd_qi *) __ptr, __o, __c); + __b.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; } -/* vld4q_lane */ +__extension__ extern __inline uint16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_u16 (const uint16_t * __ptr, uint16x4x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + uint16x8x4_t __temp; + __temp.val[0] = vcombine_u16 (__b.val[0], vcreate_u16 (0)); + __temp.val[1] = vcombine_u16 (__b.val[1], vcreate_u16 (0)); + __temp.val[2] = vcombine_u16 (__b.val[2], vcreate_u16 (0)); + __temp.val[3] = vcombine_u16 (__b.val[3], vcreate_u16 (0)); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev4hi ( + (__builtin_aarch64_simd_hi *) __ptr, __o, __c); + __b.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline uint32x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_u32 (const uint32_t * __ptr, uint32x2x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + uint32x4x4_t __temp; + __temp.val[0] = vcombine_u32 (__b.val[0], vcreate_u32 (0)); + __temp.val[1] = vcombine_u32 (__b.val[1], vcreate_u32 (0)); + __temp.val[2] = vcombine_u32 (__b.val[2], vcreate_u32 (0)); + __temp.val[3] = vcombine_u32 (__b.val[3], vcreate_u32 (0)); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev2si ( + (__builtin_aarch64_simd_si *) __ptr, __o, __c); + __b.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline uint64x1x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_u64 (const uint64_t * __ptr, uint64x1x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + uint64x2x4_t __temp; + __temp.val[0] = vcombine_u64 (__b.val[0], vcreate_u64 (0)); + __temp.val[1] = vcombine_u64 (__b.val[1], vcreate_u64 (0)); + __temp.val[2] = vcombine_u64 (__b.val[2], vcreate_u64 (0)); + __temp.val[3] = vcombine_u64 (__b.val[3], vcreate_u64 (0)); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanedi ( + (__builtin_aarch64_simd_di *) __ptr, __o, __c); + __b.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline int8x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_s8 (const int8_t * __ptr, int8x8x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + int8x16x4_t __temp; + __temp.val[0] = vcombine_s8 (__b.val[0], vcreate_s8 (0)); + __temp.val[1] = vcombine_s8 (__b.val[1], vcreate_s8 (0)); + __temp.val[2] = vcombine_s8 (__b.val[2], vcreate_s8 (0)); + __temp.val[3] = vcombine_s8 (__b.val[3], vcreate_s8 (0)); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev8qi ( + (__builtin_aarch64_simd_qi *) __ptr, __o, __c); + __b.val[0] = (int8x8_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (int8x8_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (int8x8_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (int8x8_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline int16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_s16 (const int16_t * __ptr, int16x4x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + int16x8x4_t __temp; + __temp.val[0] = vcombine_s16 (__b.val[0], vcreate_s16 (0)); + __temp.val[1] = vcombine_s16 (__b.val[1], vcreate_s16 (0)); + __temp.val[2] = vcombine_s16 (__b.val[2], vcreate_s16 (0)); + __temp.val[3] = vcombine_s16 (__b.val[3], vcreate_s16 (0)); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev4hi ( + (__builtin_aarch64_simd_hi *) __ptr, __o, __c); + __b.val[0] = (int16x4_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (int16x4_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (int16x4_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (int16x4_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline int32x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_s32 (const int32_t * __ptr, int32x2x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + int32x4x4_t __temp; + __temp.val[0] = vcombine_s32 (__b.val[0], vcreate_s32 (0)); + __temp.val[1] = vcombine_s32 (__b.val[1], vcreate_s32 (0)); + __temp.val[2] = vcombine_s32 (__b.val[2], vcreate_s32 (0)); + __temp.val[3] = vcombine_s32 (__b.val[3], vcreate_s32 (0)); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev2si ( + (__builtin_aarch64_simd_si *) __ptr, __o, __c); + __b.val[0] = (int32x2_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (int32x2_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (int32x2_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (int32x2_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline int64x1x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_s64 (const int64_t * __ptr, int64x1x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + int64x2x4_t __temp; + __temp.val[0] = vcombine_s64 (__b.val[0], vcreate_s64 (0)); + __temp.val[1] = vcombine_s64 (__b.val[1], vcreate_s64 (0)); + __temp.val[2] = vcombine_s64 (__b.val[2], vcreate_s64 (0)); + __temp.val[3] = vcombine_s64 (__b.val[3], vcreate_s64 (0)); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanedi ( + (__builtin_aarch64_simd_di *) __ptr, __o, __c); + __b.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline float16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_f16 (const float16_t * __ptr, float16x4x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + float16x8x4_t __temp; + __temp.val[0] = vcombine_f16 (__b.val[0], vcreate_f16 (0)); + __temp.val[1] = vcombine_f16 (__b.val[1], vcreate_f16 (0)); + __temp.val[2] = vcombine_f16 (__b.val[2], vcreate_f16 (0)); + __temp.val[3] = vcombine_f16 (__b.val[3], vcreate_f16 (0)); + __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev4hf ( + (__builtin_aarch64_simd_hf *) __ptr, __o, __c); + __b.val[0] = (float16x4_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (float16x4_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (float16x4_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (float16x4_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline float32x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_f32 (const float32_t * __ptr, float32x2x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + float32x4x4_t __temp; + __temp.val[0] = vcombine_f32 (__b.val[0], vcreate_f32 (0)); + __temp.val[1] = vcombine_f32 (__b.val[1], vcreate_f32 (0)); + __temp.val[2] = vcombine_f32 (__b.val[2], vcreate_f32 (0)); + __temp.val[3] = vcombine_f32 (__b.val[3], vcreate_f32 (0)); + __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev2si ( + (__builtin_aarch64_simd_sf *) __ptr, __o, __c); + __b.val[0] = (float32x2_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (float32x2_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (float32x2_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (float32x2_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline float64x1x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_f64 (const float64_t * __ptr, float64x1x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + float64x2x4_t __temp; + __temp.val[0] = vcombine_f64 (__b.val[0], vcreate_f64 (0)); + __temp.val[1] = vcombine_f64 (__b.val[1], vcreate_f64 (0)); + __temp.val[2] = vcombine_f64 (__b.val[2], vcreate_f64 (0)); + __temp.val[3] = vcombine_f64 (__b.val[3], vcreate_f64 (0)); + __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanedf ( + (__builtin_aarch64_simd_df *) __ptr, __o, __c); + __b.val[0] = (float64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (float64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (float64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (float64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline poly8x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_p8 (const poly8_t * __ptr, poly8x8x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + poly8x16x4_t __temp; + __temp.val[0] = vcombine_p8 (__b.val[0], vcreate_p8 (0)); + __temp.val[1] = vcombine_p8 (__b.val[1], vcreate_p8 (0)); + __temp.val[2] = vcombine_p8 (__b.val[2], vcreate_p8 (0)); + __temp.val[3] = vcombine_p8 (__b.val[3], vcreate_p8 (0)); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev8qi ( + (__builtin_aarch64_simd_qi *) __ptr, __o, __c); + __b.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} -__LD4_LANE_FUNC (float16x4x4_t, float16x4_t, float16x8x4_t, float16_t, v4hf, - v8hf, hf, f16, float16x8_t) -__LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v2sf, v4sf, - sf, f32, float32x4_t) -__LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, df, v2df, - df, f64, float64x2_t) -__LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8, - int8x16_t) -__LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, - p16, int16x8_t) -__LD4_LANE_FUNC (poly64x1x4_t, poly64x1_t, poly64x2x4_t, poly64_t, di, - v2di_ssps, di, p64, poly64x2_t) -__LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8, - int8x16_t) -__LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16, - int16x8_t) -__LD4_LANE_FUNC (int32x2x4_t, int32x2_t, int32x4x4_t, int32_t, v2si, v4si, si, s32, - int32x4_t) -__LD4_LANE_FUNC (int64x1x4_t, int64x1_t, int64x2x4_t, int64_t, di, v2di, di, s64, - int64x2_t) -__LD4_LANE_FUNC (uint8x8x4_t, uint8x8_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8, - int8x16_t) -__LD4_LANE_FUNC (uint16x4x4_t, uint16x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, - u16, int16x8_t) -__LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v2si, v4si, si, - u32, int32x4_t) -__LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, di, v2di, di, - u64, int64x2_t) +__extension__ extern __inline poly16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_p16 (const poly16_t * __ptr, poly16x4x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + poly16x8x4_t __temp; + __temp.val[0] = vcombine_p16 (__b.val[0], vcreate_p16 (0)); + __temp.val[1] = vcombine_p16 (__b.val[1], vcreate_p16 (0)); + __temp.val[2] = vcombine_p16 (__b.val[2], vcreate_p16 (0)); + __temp.val[3] = vcombine_p16 (__b.val[3], vcreate_p16 (0)); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev4hi ( + (__builtin_aarch64_simd_hi *) __ptr, __o, __c); + __b.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline poly64x1x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_p64 (const poly64_t * __ptr, poly64x1x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + poly64x2x4_t __temp; + __temp.val[0] = vcombine_p64 (__b.val[0], vcreate_p64 (0)); + __temp.val[1] = vcombine_p64 (__b.val[1], vcreate_p64 (0)); + __temp.val[2] = vcombine_p64 (__b.val[2], vcreate_p64 (0)); + __temp.val[3] = vcombine_p64 (__b.val[3], vcreate_p64 (0)); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanedi ( + (__builtin_aarch64_simd_di *) __ptr, __o, __c); + __b.val[0] = (poly64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (poly64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (poly64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (poly64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} /* vld4q_lane */ -#define __LD4Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \ -__extension__ extern __inline intype \ -__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \ -vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ -{ \ - __builtin_aarch64_simd_xi __o; \ - intype ret; \ - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); \ - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); \ - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); \ - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); \ - __o = __builtin_aarch64_ld4_lane##mode ( \ - (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \ - ret.val[0] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 0); \ - ret.val[1] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 1); \ - ret.val[2] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 2); \ - ret.val[3] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 3); \ - return ret; \ -} - -__LD4Q_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16) -__LD4Q_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32) -__LD4Q_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64) -__LD4Q_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8) -__LD4Q_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16) -__LD4Q_LANE_FUNC (poly64x2x4_t, poly64x2_t, poly64_t, v2di, di, p64) -__LD4Q_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8) -__LD4Q_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16) -__LD4Q_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32) -__LD4Q_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64) -__LD4Q_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8) -__LD4Q_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16) -__LD4Q_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32) -__LD4Q_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64) +__extension__ extern __inline uint8x16x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_u8 (const uint8_t * __ptr, uint8x16x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + uint8x16x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev16qi ( + (__builtin_aarch64_simd_qi *) __ptr, __o, __c); + ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline uint16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_u16 (const uint16_t * __ptr, uint16x8x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + uint16x8x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev8hi ( + (__builtin_aarch64_simd_hi *) __ptr, __o, __c); + ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline uint32x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_u32 (const uint32_t * __ptr, uint32x4x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + uint32x4x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev4si ( + (__builtin_aarch64_simd_si *) __ptr, __o, __c); + ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline uint64x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_u64 (const uint64_t * __ptr, uint64x2x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + uint64x2x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev2di ( + (__builtin_aarch64_simd_di *) __ptr, __o, __c); + ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline int8x16x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_s8 (const int8_t * __ptr, int8x16x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + int8x16x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev16qi ( + (__builtin_aarch64_simd_qi *) __ptr, __o, __c); + ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline int16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_s16 (const int16_t * __ptr, int16x8x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + int16x8x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev8hi ( + (__builtin_aarch64_simd_hi *) __ptr, __o, __c); + ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline int32x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_s32 (const int32_t * __ptr, int32x4x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + int32x4x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev4si ( + (__builtin_aarch64_simd_si *) __ptr, __o, __c); + ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline int64x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_s64 (const int64_t * __ptr, int64x2x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + int64x2x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev2di ( + (__builtin_aarch64_simd_di *) __ptr, __o, __c); + ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline float16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_f16 (const float16_t * __ptr, float16x8x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + float16x8x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev8hf ( + (__builtin_aarch64_simd_hf *) __ptr, __o, __c); + ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (float16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline float32x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_f32 (const float32_t * __ptr, float32x4x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + float32x4x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev4sf ( + (__builtin_aarch64_simd_sf *) __ptr, __o, __c); + ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline float64x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_f64 (const float64_t * __ptr, float64x2x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + float64x2x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev2df ( + (__builtin_aarch64_simd_di *) __ptr, __o, __c); + ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline poly8x16x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_p8 (const poly8_t * __ptr, poly8x16x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + poly8x16x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev16qi ( + (__builtin_aarch64_simd_qi *) __ptr, __o, __c); + ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline poly16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_p16 (const poly16_t * __ptr, poly16x8x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + poly16x8x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev8hi ( + (__builtin_aarch64_simd_hi *) __ptr, __o, __c); + ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ extern __inline poly64x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_p64 (const poly64_t * __ptr, poly64x2x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + poly64x2x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev2di ( + (__builtin_aarch64_simd_di *) __ptr, __o, __c); + ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (poly64x2_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} /* vmax */ @@ -35441,9 +35926,47 @@ vld3q_lane_bf16 (const bfloat16_t * __ptr, bfloat16x8x3_t __b, const int __c) return ret; } -__LD4_LANE_FUNC (bfloat16x4x4_t, bfloat16x4_t, bfloat16x8x4_t, bfloat16_t, v4bf, - v8bf, bf, bf16, bfloat16x8_t) -__LD4Q_LANE_FUNC (bfloat16x8x4_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16) +__extension__ extern __inline bfloat16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_bf16 (const bfloat16_t * __ptr, bfloat16x4x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + bfloat16x8x4_t __temp; + __temp.val[0] = vcombine_bf16 (__b.val[0], vcreate_bf16 (0)); + __temp.val[1] = vcombine_bf16 (__b.val[1], vcreate_bf16 (0)); + __temp.val[2] = vcombine_bf16 (__b.val[2], vcreate_bf16 (0)); + __temp.val[3] = vcombine_bf16 (__b.val[3], vcreate_bf16 (0)); + __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[3], 3); + __o = __builtin_aarch64_ld4_lanev4bf ( + (__builtin_aarch64_simd_bf *) __ptr, __o, __c); + __b.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregxidi (__o, 0); + __b.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregxidi (__o, 1); + __b.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregxidi (__o, 2); + __b.val[3] = (bfloat16x4_t) __builtin_aarch64_get_dregxidi (__o, 3); + return __b; +} + +__extension__ extern __inline bfloat16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_bf16 (const bfloat16_t * __ptr, bfloat16x8x4_t __b, const int __c) +{ + __builtin_aarch64_simd_xi __o; + bfloat16x8x4_t ret; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); + __o = __builtin_aarch64_ld4_lanev8bf ( + (__builtin_aarch64_simd_bf *) __ptr, __o, __c); + ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -35739,7 +36262,4 @@ vaddq_p128 (poly128_t __a, poly128_t __b) #undef __aarch64_vdupq_laneq_u32 #undef __aarch64_vdupq_laneq_u64 -#undef __LD4_LANE_FUNC -#undef __LD4Q_LANE_FUNC - #endif </cut>

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_kernel/llvm-release-arm-next-allmodconfig - Build # 31 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *linux* in CI configuration tcwg_kernel/llvm-release-arm-next-allmodconfig. So far, this commit has regressed CI configurations: - tcwg_kernel/llvm-release-arm-next-allmodconfig Culprit: <cut> commit fad7cd3310db3099f95dd34312c77740fbc455e5 Author: Baokun Li <libaokun1(a)huawei.com> Date: Wed Aug 4 10:12:12 2021 +0800 nbd: add the check to prevent overflow in __nbd_ioctl() If user specify a large enough value of NBD blocks option, it may trigger signed integer overflow which may lead to nbd->config->bytesize becomes a large or small value, zero in particular. UBSAN: Undefined behaviour in drivers/block/nbd.c:325:31 signed integer overflow: 1024 * 4611686155866341414 cannot be represented in type 'long long int' [...] Call trace: [...] handle_overflow+0x188/0x1dc lib/ubsan.c:192 __ubsan_handle_mul_overflow+0x34/0x44 lib/ubsan.c:213 nbd_size_set drivers/block/nbd.c:325 [inline] __nbd_ioctl drivers/block/nbd.c:1342 [inline] nbd_ioctl+0x998/0xa10 drivers/block/nbd.c:1395 __blkdev_driver_ioctl block/ioctl.c:311 [inline] [...] Although it is not a big deal, still silence the UBSAN by limit the input value. Reported-by: Hulk Robot <hulkci(a)huawei.com> Signed-off-by: Baokun Li <libaokun1(a)huawei.com> Reviewed-by: Josef Bacik <josef(a)toxicpanda.com> Link: https://lore.kernel.org/r/20210804021212.990223-1-libaokun1@huawei.com [axboe: dropped unlikely()] Signed-off-by: Jens Axboe <axboe(a)kernel.dk> </cut> Results regressed to (for first_bad == fad7cd3310db3099f95dd34312c77740fbc455e5) # reset_artifacts: -10 # build_abe binutils: -9 # build_llvm: -5 # build_abe qemu: -2 # linux_n_obj: 21709 # First few build errors in logs: # 00:07:12 make[1]: *** [modules-only.symvers] Error 1 # 00:07:12 make: *** [modules] Error 2 from (for last_good == da20b58d5bbbb0d23ae9530992a37d0f0d1787a4) # reset_artifacts: -10 # build_abe binutils: -9 # build_llvm: -5 # build_abe qemu: -2 # linux_n_obj: 29751 # linux build successful: all Artifacts of last_good build: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-release-arm-next-all… Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-release-arm-next-all… Build top page/logs: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-release-arm-next-all… Configuration details: rr[linux_git]="https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git#ecf9343…" Reproduce builds: <cut> mkdir investigate-linux-fad7cd3310db3099f95dd34312c77740fbc455e5 cd investigate-linux-fad7cd3310db3099f95dd34312c77740fbc455e5 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-release-arm-next-all… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-release-arm-next-all… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-release-arm-next-all… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_kernel-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /linux/ ./ ./bisect/baseline/ cd linux # Reproduce first_bad build git checkout --detach fad7cd3310db3099f95dd34312c77740fbc455e5 ../artifacts/test.sh # Reproduce last_good build git checkout --detach da20b58d5bbbb0d23ae9530992a37d0f0d1787a4 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-release-arm-next-all… Build log: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-release-arm-next-all… Full commit (up to 1000 lines): <cut> commit fad7cd3310db3099f95dd34312c77740fbc455e5 Author: Baokun Li <libaokun1(a)huawei.com> Date: Wed Aug 4 10:12:12 2021 +0800 nbd: add the check to prevent overflow in __nbd_ioctl() If user specify a large enough value of NBD blocks option, it may trigger signed integer overflow which may lead to nbd->config->bytesize becomes a large or small value, zero in particular. UBSAN: Undefined behaviour in drivers/block/nbd.c:325:31 signed integer overflow: 1024 * 4611686155866341414 cannot be represented in type 'long long int' [...] Call trace: [...] handle_overflow+0x188/0x1dc lib/ubsan.c:192 __ubsan_handle_mul_overflow+0x34/0x44 lib/ubsan.c:213 nbd_size_set drivers/block/nbd.c:325 [inline] __nbd_ioctl drivers/block/nbd.c:1342 [inline] nbd_ioctl+0x998/0xa10 drivers/block/nbd.c:1395 __blkdev_driver_ioctl block/ioctl.c:311 [inline] [...] Although it is not a big deal, still silence the UBSAN by limit the input value. Reported-by: Hulk Robot <hulkci(a)huawei.com> Signed-off-by: Baokun Li <libaokun1(a)huawei.com> Reviewed-by: Josef Bacik <josef(a)toxicpanda.com> Link: https://lore.kernel.org/r/20210804021212.990223-1-libaokun1@huawei.com [axboe: dropped unlikely()] Signed-off-by: Jens Axboe <axboe(a)kernel.dk> --- drivers/block/nbd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index c38317979f74..f82264835794 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1384,6 +1384,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, unsigned int cmd, unsigned long arg) { struct nbd_config *config = nbd->config; + loff_t bytesize; switch (cmd) { case NBD_DISCONNECT: @@ -1398,8 +1399,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, case NBD_SET_SIZE: return nbd_set_size(nbd, arg, config->blksize); case NBD_SET_SIZE_BLOCKS: - return nbd_set_size(nbd, arg * config->blksize, - config->blksize); + if (check_mul_overflow((loff_t)arg, config->blksize, &bytesize)) + return -EINVAL; + return nbd_set_size(nbd, bytesize, config->blksize); case NBD_SET_TIMEOUT: nbd_set_cmd_timeout(nbd, arg); return 0; </cut>

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/llvm-release-aarch64-spec2k6-O3 - Build # 7 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-release-aarch64-spec2k6-O3. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tx1/llvm-release-aarch64-spec2k6-O3 Culprit: <cut> commit 6998f8ae2d14e096aff33968f226587b5c1a193a Author: David Sherwood <david.sherwood(a)arm.com> Date: Wed Mar 10 08:34:19 2021 +0000 [LoopVectorize] Simplify scalar cost calculation in getInstructionCost This patch simplifies the calculation of certain costs in getInstructionCost when isScalarAfterVectorization() returns a true value. There are a few places where we multiply a cost by a number N, i.e. unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1; return N * TTI.getArithmeticInstrCost(... After some investigation it seems that there are only these cases that occur in practice: 1. VF is a scalar, in which case N = 1. 2. VF is a vector. We can only get here if: a) the instruction is a GEP/bitcast/PHI with scalar uses, or b) this is an update to an induction variable that remains scalar. I have changed the code so that N is assumed to always be 1. For GEPs the cost is always 0, since this is calculated later on as part of the load/store cost. PHI nodes are costed separately and were never previously multiplied by VF. For all other cases I have added an assert that none of the users needs scalarising, which didn't fire in any unit tests. Only one test required fixing and I believe the original cost for the scalar add instruction to have been wrong, since only one copy remains after vectorisation. I have also added a new test for the case when a pointer PHI feeds directly into a store that will be scalarised as we were previously never testing it. Differential Revision: https://reviews.llvm.org/D99718 </cut> Results regressed to (for first_bad == 6998f8ae2d14e096aff33968f226587b5c1a193a) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3 artifacts/build-6998f8ae2d14e096aff33968f226587b5c1a193a/results_id: 1 # 462.libquantum,libquantum_base.default regressed by 114 # 462.libquantum,[.] quantum_toffoli regressed by 123 # 462.libquantum,[.] quantum_cnot regressed by 115 from (for last_good == c835630c25a4f9925517949579f66a43b113fbc9) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3 artifacts/build-c835630c25a4f9925517949579f66a43b113fbc9/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-release-aarch64-spec2k6-O3/3744 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-release-aarch64-spec2k6-O3/3755 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-6998f8ae2d14e096aff33968f226587b5c1a193a cd investigate-llvm-6998f8ae2d14e096aff33968f226587b5c1a193a git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach 6998f8ae2d14e096aff33968f226587b5c1a193a ../artifacts/test.sh # Reproduce last_good build git checkout --detach c835630c25a4f9925517949579f66a43b113fbc9 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Full commit (up to 1000 lines): <cut> commit 6998f8ae2d14e096aff33968f226587b5c1a193a Author: David Sherwood <david.sherwood(a)arm.com> Date: Wed Mar 10 08:34:19 2021 +0000 [LoopVectorize] Simplify scalar cost calculation in getInstructionCost This patch simplifies the calculation of certain costs in getInstructionCost when isScalarAfterVectorization() returns a true value. There are a few places where we multiply a cost by a number N, i.e. unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1; return N * TTI.getArithmeticInstrCost(... After some investigation it seems that there are only these cases that occur in practice: 1. VF is a scalar, in which case N = 1. 2. VF is a vector. We can only get here if: a) the instruction is a GEP/bitcast/PHI with scalar uses, or b) this is an update to an induction variable that remains scalar. I have changed the code so that N is assumed to always be 1. For GEPs the cost is always 0, since this is calculated later on as part of the load/store cost. PHI nodes are costed separately and were never previously multiplied by VF. For all other cases I have added an assert that none of the users needs scalarising, which didn't fire in any unit tests. Only one test required fixing and I believe the original cost for the scalar add instruction to have been wrong, since only one copy remains after vectorisation. I have also added a new test for the case when a pointer PHI feeds directly into a store that will be scalarised as we were previously never testing it. Differential Revision: https://reviews.llvm.org/D99718 --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 73 +++++++++++++--------- .../AArch64/no_vector_instructions.ll | 2 +- .../LoopVectorize/AArch64/predication_costs.ll | 35 +++++++++++ .../Transforms/LoopVectorize/scalarized-bitcast.ll | 40 ++++++++++++ 4 files changed, 121 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 2b413fc49505..f25af23c86c2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7383,10 +7383,39 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, Type *RetTy = I->getType(); if (canTruncateToMinimalBitwidth(I, VF)) RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); - VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF); auto SE = PSE.getSE(); TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + auto hasSingleCopyAfterVectorization = [this](Instruction *I, + ElementCount VF) -> bool { + if (VF.isScalar()) + return true; + + auto Scalarized = InstsToScalarize.find(VF); + assert(Scalarized != InstsToScalarize.end() && + "VF not yet analyzed for scalarization profitability"); + return !Scalarized->second.count(I) && + llvm::all_of(I->users(), [&](User *U) { + auto *UI = cast<Instruction>(U); + return !Scalarized->second.count(UI); + }); + }; + + if (isScalarAfterVectorization(I, VF)) { + // With the exception of GEPs and PHIs, after scalarization there should + // only be one copy of the instruction generated in the loop. This is + // because the VF is either 1, or any instructions that need scalarizing + // have already been dealt with by the the time we get here. As a result, + // it means we don't have to multiply the instruction cost by VF. + assert(I->getOpcode() == Instruction::GetElementPtr || + I->getOpcode() == Instruction::PHI || + (I->getOpcode() == Instruction::BitCast && + I->getType()->isPointerTy()) || + hasSingleCopyAfterVectorization(I, VF)); + VectorTy = RetTy; + } else + VectorTy = ToVectorTy(RetTy, VF); + // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { case Instruction::GetElementPtr: @@ -7514,21 +7543,16 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, Op2VK = TargetTransformInfo::OK_UniformValue; SmallVector<const Value *, 4> Operands(I->operand_values()); - unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1; - return N * TTI.getArithmeticInstrCost( - I->getOpcode(), VectorTy, CostKind, - TargetTransformInfo::OK_AnyValue, - Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I); + return TTI.getArithmeticInstrCost( + I->getOpcode(), VectorTy, CostKind, TargetTransformInfo::OK_AnyValue, + Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I); } case Instruction::FNeg: { assert(!VF.isScalable() && "VF is assumed to be non scalable."); - unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1; - return N * TTI.getArithmeticInstrCost( - I->getOpcode(), VectorTy, CostKind, - TargetTransformInfo::OK_AnyValue, - TargetTransformInfo::OK_AnyValue, - TargetTransformInfo::OP_None, TargetTransformInfo::OP_None, - I->getOperand(0), I); + return TTI.getArithmeticInstrCost( + I->getOpcode(), VectorTy, CostKind, TargetTransformInfo::OK_AnyValue, + TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None, + TargetTransformInfo::OP_None, I->getOperand(0), I); } case Instruction::Select: { SelectInst *SI = cast<SelectInst>(I); @@ -7583,6 +7607,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, VectorTy = ToVectorTy(getMemInstValueType(I), Width); return getMemoryInstructionCost(I, VF); } + case Instruction::BitCast: + if (I->getType()->isPointerTy()) + return 0; + LLVM_FALLTHROUGH; case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: @@ -7593,8 +7621,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, case Instruction::SIToFP: case Instruction::UIToFP: case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: { + case Instruction::FPTrunc: { // Computes the CastContextHint from a Load/Store instruction. auto ComputeCCH = [&](Instruction *I) -> TTI::CastContextHint { assert((isa<LoadInst>(I) || isa<StoreInst>(I)) && @@ -7672,14 +7699,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, } } - unsigned N; - if (isScalarAfterVectorization(I, VF)) { - assert(!VF.isScalable() && "VF is assumed to be non scalable"); - N = VF.getKnownMinValue(); - } else - N = 1; - return N * - TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I); + return TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I); } case Instruction::Call: { bool NeedToScalarize; @@ -7694,11 +7714,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, case Instruction::ExtractValue: return TTI.getInstructionCost(I, TTI::TCK_RecipThroughput); default: - // The cost of executing VF copies of the scalar instruction. This opcode - // is unknown. Assume that it is the same as 'mul'. - return VF.getKnownMinValue() * TTI.getArithmeticInstrCost( - Instruction::Mul, VectorTy, CostKind) + - getScalarizationOverhead(I, VF); + // This opcode is unknown. Assume that it is the same as 'mul'. + return TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind); } // end of switch. } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll index 247ea35ff5d0..3061998518ad 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll @@ -6,7 +6,7 @@ target triple = "aarch64--linux-gnu" ; CHECK-LABEL: all_scalar ; CHECK: LV: Found scalar instruction: %i.next = add nuw nsw i64 %i, 2 -; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %i.next = add nuw nsw i64 %i, 2 +; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %i.next = add nuw nsw i64 %i, 2 ; CHECK: LV: Not considering vector loop of width 2 because it will not generate any vector instructions ; define void @all_scalar(i64* %a, i64 %n) { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll index b0ebb4edf2ad..858b28ddd321 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll @@ -86,6 +86,41 @@ for.end: ret void } +; CHECK-LABEL: predicated_store_phi +; +; Same as predicate_store except we use a pointer PHI to maintain the address +; +; CHECK: Found new scalar instruction: %addr = phi i32* [ %a, %entry ], [ %addr.next, %for.inc ] +; CHECK: Found new scalar instruction: %addr.next = getelementptr inbounds i32, i32* %addr, i64 1 +; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %addr, align 4 +; CHECK: Found an estimated cost of 0 for VF 2 For instruction: %addr = phi i32* [ %a, %entry ], [ %addr.next, %for.inc ] +; CHECK: Found an estimated cost of 3 for VF 2 For instruction: store i32 %tmp2, i32* %addr, align 4 +; +define void @predicated_store_phi(i32* %a, i1 %c, i32 %x, i64 %n) { +entry: + br label %for.body + +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ] + %addr = phi i32 * [ %a, %entry ], [ %addr.next, %for.inc ] + %tmp1 = load i32, i32* %addr, align 4 + %tmp2 = add nsw i32 %tmp1, %x + br i1 %c, label %if.then, label %for.inc + +if.then: + store i32 %tmp2, i32* %addr, align 4 + br label %for.inc + +for.inc: + %i.next = add nuw nsw i64 %i, 1 + %cond = icmp slt i64 %i.next, %n + %addr.next = getelementptr inbounds i32, i32* %addr, i64 1 + br i1 %cond, label %for.body, label %for.end + +for.end: + ret void +} + ; CHECK-LABEL: predicated_udiv_scalarized_operand ; ; This test checks that we correctly compute the cost of the predicated udiv diff --git a/llvm/test/Transforms/LoopVectorize/scalarized-bitcast.ll b/llvm/test/Transforms/LoopVectorize/scalarized-bitcast.ll new file mode 100644 index 000000000000..0c97e6ac475e --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/scalarized-bitcast.ll @@ -0,0 +1,40 @@ +; REQUIRES: asserts +; RUN: opt -loop-vectorize -force-vector-width=2 -debug-only=loop-vectorize -S -o - < %s 2>&1 | FileCheck %s + +%struct.foo = type { i32, i64 } + +; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %0 = bitcast i64* %b to i32* + +; The bitcast below will be scalarized due to the predication in the loop. Bitcasts +; between pointer types should be treated as free, despite the scalarization. +define void @foo(%struct.foo* noalias nocapture %in, i32* noalias nocapture readnone %out, i64 %n) { +entry: + br label %for.body + +for.body: ; preds = %entry, %if.end + %i.012 = phi i64 [ %inc, %if.end ], [ 0, %entry ] + %b = getelementptr inbounds %struct.foo, %struct.foo* %in, i64 %i.012, i32 1 + %0 = bitcast i64* %b to i32* + %a = getelementptr inbounds %struct.foo, %struct.foo* %in, i64 %i.012, i32 0 + %1 = load i32, i32* %a, align 8 + %tobool.not = icmp eq i32 %1, 0 + br i1 %tobool.not, label %if.end, label %land.lhs.true + +land.lhs.true: ; preds = %for.body + %2 = load i32, i32* %0, align 4 + %cmp2 = icmp sgt i32 %2, 0 + br i1 %cmp2, label %if.then, label %if.end + +if.then: ; preds = %land.lhs.true + %sub = add nsw i32 %2, -1 + store i32 %sub, i32* %0, align 4 + br label %if.end + +if.end: ; preds = %if.then, %land.lhs.true, %for.body + %inc = add nuw nsw i64 %i.012, 1 + %exitcond.not = icmp eq i64 %inc, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %if.end + ret void +} </cut>

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_apm/llvm-master-aarch64-spec2k6-Oz_LTO - Build # 3 - Fixed!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_apm/llvm-master-aarch64-spec2k6-Oz_LTO. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_apm/llvm-master-aarch64-spec2k6-Oz_LTO Culprit: <cut> commit 4aafd5f00c2a772337ec065d4542ef158453a343 Author: Jan Svoboda <jan_svoboda(a)apple.com> Date: Fri Aug 6 14:46:41 2021 +0200 [clang] Remove misleading assertion in FullSourceLoc D31709 added an assertion was added to `FullSourceLoc::hasManager()` that ensured a valid `SourceLocation` is always paired with a `SourceManager`, and missing `SourceManager` is always paired with an invalid `SourceLocation`. This appears to be incorrect, since clients never cared about constructing `FullSourceLoc` to uphold that invariant, or always checking `isValid()` before calling `hasManager()`. The assertion started failing when serializing diagnostics pointing into an explicit module. Explicit modules don't have valid `SourceLocation` for the `import` statement, since they are "imported" from the command-line argument `-fmodule-name=x.pcm`. This patch removes the assertion, since `FullSourceLoc` was never intended to uphold any kind of invariants between the validity of `SourceLocation` and presence of `SourceManager`. Reviewed By: arphaman Differential Revision: https://reviews.llvm.org/D106862 </cut> Results regressed to (for first_bad == 4aafd5f00c2a772337ec065d4542ef158453a343) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Oz_LTO artifacts/build-4aafd5f00c2a772337ec065d4542ef158453a343/results_id: 1 # 470.lbm,lbm_base.default regressed by 104 from (for last_good == 3709822d2602b8b7db2d9bcc0e856f676582f25d) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Oz_LTO artifacts/build-3709822d2602b8b7db2d9bcc0e856f676582f25d/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Results ID of last_good: apm_64/tcwg_bmk_llvm_apm/bisect-llvm-master-aarch64-spec2k6-Oz_LTO/3746 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Results ID of first_bad: apm_64/tcwg_bmk_llvm_apm/bisect-llvm-master-aarch64-spec2k6-Oz_LTO/3725 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-4aafd5f00c2a772337ec065d4542ef158453a343 cd investigate-llvm-4aafd5f00c2a772337ec065d4542ef158453a343 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach 4aafd5f00c2a772337ec065d4542ef158453a343 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 3709822d2602b8b7db2d9bcc0e856f676582f25d ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Full commit (up to 1000 lines): <cut> commit 4aafd5f00c2a772337ec065d4542ef158453a343 Author: Jan Svoboda <jan_svoboda(a)apple.com> Date: Fri Aug 6 14:46:41 2021 +0200 [clang] Remove misleading assertion in FullSourceLoc D31709 added an assertion was added to `FullSourceLoc::hasManager()` that ensured a valid `SourceLocation` is always paired with a `SourceManager`, and missing `SourceManager` is always paired with an invalid `SourceLocation`. This appears to be incorrect, since clients never cared about constructing `FullSourceLoc` to uphold that invariant, or always checking `isValid()` before calling `hasManager()`. The assertion started failing when serializing diagnostics pointing into an explicit module. Explicit modules don't have valid `SourceLocation` for the `import` statement, since they are "imported" from the command-line argument `-fmodule-name=x.pcm`. This patch removes the assertion, since `FullSourceLoc` was never intended to uphold any kind of invariants between the validity of `SourceLocation` and presence of `SourceManager`. Reviewed By: arphaman Differential Revision: https://reviews.llvm.org/D106862 --- clang/include/clang/Basic/SourceLocation.h | 13 +++++++------ clang/test/Modules/Inputs/explicit-build-diags/a.h | 1 + .../Modules/Inputs/explicit-build-diags/module.modulemap | 1 + clang/test/Modules/explicit-build-diags.cpp | 8 ++++++++ 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h index 540de23b9f55..ba2e9156a2b1 100644 --- a/clang/include/clang/Basic/SourceLocation.h +++ b/clang/include/clang/Basic/SourceLocation.h @@ -363,6 +363,10 @@ class FileEntry; /// A SourceLocation and its associated SourceManager. /// /// This is useful for argument passing to functions that expect both objects. +/// +/// This class does not guarantee the presence of either the SourceManager or +/// a valid SourceLocation. Clients should use `isValid()` and `hasManager()` +/// before calling the member functions. class FullSourceLoc : public SourceLocation { const SourceManager *SrcMgr = nullptr; @@ -373,13 +377,10 @@ public: explicit FullSourceLoc(SourceLocation Loc, const SourceManager &SM) : SourceLocation(Loc), SrcMgr(&SM) {} - bool hasManager() const { - bool hasSrcMgr = SrcMgr != nullptr; - assert(hasSrcMgr == isValid() && "FullSourceLoc has location but no manager"); - return hasSrcMgr; - } + /// Checks whether the SourceManager is present. + bool hasManager() const { return SrcMgr != nullptr; } - /// \pre This FullSourceLoc has an associated SourceManager. + /// \pre hasManager() const SourceManager &getManager() const { assert(SrcMgr && "SourceManager is NULL."); return *SrcMgr; diff --git a/clang/test/Modules/Inputs/explicit-build-diags/a.h b/clang/test/Modules/Inputs/explicit-build-diags/a.h new file mode 100644 index 000000000000..486941dde83b --- /dev/null +++ b/clang/test/Modules/Inputs/explicit-build-diags/a.h @@ -0,0 +1 @@ +void a() __attribute__((deprecated)); diff --git a/clang/test/Modules/Inputs/explicit-build-diags/module.modulemap b/clang/test/Modules/Inputs/explicit-build-diags/module.modulemap new file mode 100644 index 000000000000..bb00c840ce39 --- /dev/null +++ b/clang/test/Modules/Inputs/explicit-build-diags/module.modulemap @@ -0,0 +1 @@ +module a { header "a.h" } diff --git a/clang/test/Modules/explicit-build-diags.cpp b/clang/test/Modules/explicit-build-diags.cpp new file mode 100644 index 000000000000..4a37dc108a68 --- /dev/null +++ b/clang/test/Modules/explicit-build-diags.cpp @@ -0,0 +1,8 @@ +// RUN: rm -rf %t && mkdir %t +// RUN: %clang_cc1 -fmodules -x c++ %S/Inputs/explicit-build-diags/module.modulemap -fmodule-name=a -emit-module -o %t/a.pcm +// RUN: %clang_cc1 -fmodules -Wdeprecated-declarations -fdiagnostics-show-note-include-stack -serialize-diagnostic-file %t/tu.dia \ +// RUN: -I %S/Inputs/explicit-build-diags -fmodule-file=%t/a.pcm -fsyntax-only %s + +#include "a.h" + +void foo() { a(); } </cut>

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tk1/llvm-release-arm-spec2k6-O2_LTO - Build # 9 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O2_LTO. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O2_LTO Culprit: <cut> commit d8c373815d35df1b8544784ce172ade68fb01f8f Author: Vladislav Vinogradov <vlad.vinogradov(a)intel.com> Date: Tue Feb 2 18:26:05 2021 +0000 [mlir][NFC] Add missing include guards to MlirOptMain.h Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D95533 </cut> Results regressed to (for first_bad == d8c373815d35df1b8544784ce172ade68fb01f8f) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2_LTO_marm artifacts/build-d8c373815d35df1b8544784ce172ade68fb01f8f/results_id: 1 # 464.h264ref,libc.so.6 regressed by 120 from (for last_good == f1bdf9fa9bc5edc616842b6cb9028b7d207e012c) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2_LTO_marm artifacts/build-f1bdf9fa9bc5edc616842b6cb9028b7d207e012c/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of last_good: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O2_LTO/3730 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of first_bad: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O2_LTO/3737 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-d8c373815d35df1b8544784ce172ade68fb01f8f cd investigate-llvm-d8c373815d35df1b8544784ce172ade68fb01f8f git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach d8c373815d35df1b8544784ce172ade68fb01f8f ../artifacts/test.sh # Reproduce last_good build git checkout --detach f1bdf9fa9bc5edc616842b6cb9028b7d207e012c ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Full commit (up to 1000 lines): <cut> commit d8c373815d35df1b8544784ce172ade68fb01f8f Author: Vladislav Vinogradov <vlad.vinogradov(a)intel.com> Date: Tue Feb 2 18:26:05 2021 +0000 [mlir][NFC] Add missing include guards to MlirOptMain.h Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D95533 --- mlir/include/mlir/Support/MlirOptMain.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mlir/include/mlir/Support/MlirOptMain.h b/mlir/include/mlir/Support/MlirOptMain.h index da03baed2ae7..71d47317571e 100644 --- a/mlir/include/mlir/Support/MlirOptMain.h +++ b/mlir/include/mlir/Support/MlirOptMain.h @@ -10,6 +10,9 @@ // //===----------------------------------------------------------------------===// +#ifndef MLIR_SUPPORT_MLIROPTMAIN_H +#define MLIR_SUPPORT_MLIROPTMAIN_H + #include "mlir/Support/LogicalResult.h" #include "llvm/ADT/StringRef.h" @@ -59,3 +62,5 @@ LogicalResult MlirOptMain(int argc, char **argv, llvm::StringRef toolName, bool preloadDialectsInContext = true); } // end namespace mlir + +#endif // MLIR_SUPPORT_MLIROPTMAIN_H </cut>

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_apm/llvm-master-aarch64-spec2k6-Oz - Build # 5 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *gcc* in CI configuration tcwg_bmk_llvm_apm/llvm-master-aarch64-spec2k6-Oz. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_apm/llvm-master-aarch64-spec2k6-Oz Culprit: <cut> commit fd26ce83981c6b50519805500272ab26b4e4c4b0 Author: Jeff Law <jlaw(a)localhost.localdomain> Date: Sun Aug 8 11:20:41 2021 -0400 Fix c6x test compromised by recent improvements to bswap & rotates gcc/testsuite * gcc.target/tic6x/rotdi16-scan.c: Pull rotate into its own function. </cut> Results regressed to (for first_bad == fd26ce83981c6b50519805500272ab26b4e4c4b0) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Oz artifacts/build-fd26ce83981c6b50519805500272ab26b4e4c4b0/results_id: 1 # 482.sphinx3,[.] OUTLINED_FUNCTION_4 regressed by 117 from (for last_good == e9b639c4b532212ca92b2261f820768993770daa) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Oz artifacts/build-e9b639c4b532212ca92b2261f820768993770daa/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Results ID of last_good: apm_64/tcwg_bmk_llvm_apm/bisect-llvm-master-aarch64-spec2k6-Oz/3651 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Results ID of first_bad: apm_64/tcwg_bmk_llvm_apm/bisect-llvm-master-aarch64-spec2k6-Oz/3643 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Configuration details: Reproduce builds: <cut> mkdir investigate-gcc-fd26ce83981c6b50519805500272ab26b4e4c4b0 cd investigate-gcc-fd26ce83981c6b50519805500272ab26b4e4c4b0 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /gcc/ ./ ./bisect/baseline/ cd gcc # Reproduce first_bad build git checkout --detach fd26ce83981c6b50519805500272ab26b4e4c4b0 ../artifacts/test.sh # Reproduce last_good build git checkout --detach e9b639c4b532212ca92b2261f820768993770daa ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Full commit (up to 1000 lines): <cut> commit fd26ce83981c6b50519805500272ab26b4e4c4b0 Author: Jeff Law <jlaw(a)localhost.localdomain> Date: Sun Aug 8 11:20:41 2021 -0400 Fix c6x test compromised by recent improvements to bswap & rotates gcc/testsuite * gcc.target/tic6x/rotdi16-scan.c: Pull rotate into its own function. --- gcc/testsuite/gcc.target/tic6x/rotdi16-scan.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.target/tic6x/rotdi16-scan.c b/gcc/testsuite/gcc.target/tic6x/rotdi16-scan.c index 4d7816c1537..550418324e6 100644 --- a/gcc/testsuite/gcc.target/tic6x/rotdi16-scan.c +++ b/gcc/testsuite/gcc.target/tic6x/rotdi16-scan.c @@ -7,10 +7,14 @@ unsigned long long z = 0x012389ab4567cdefull; +unsigned long long __attribute__ ((noinline,noclone,noipa)) bar () +{ + return (z << 48) | (z >> 16); +} + int main () { - unsigned long long z2 = (z << 48) | (z >> 16); - if (z2 != 0xcdef012389ab4567ull) + if (bar() != 0xcdef012389ab4567ull) abort (); exit (0); } </cut>

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tk1/llvm-release-arm-spec2k6-O3 - Build # 8 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O3. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O3 Culprit: <cut> commit e771614bae0a05585f720812d5936a0b81dcddf0 Author: David Green <david.green(a)arm.com> Date: Thu Feb 11 11:58:55 2021 +0000 [ARM] Change getScalarizationOverhead overload used in gather costs. NFC This changes which of the getScalarizationOverhead overloads is used in the gather/scatter cost to use the base variant directly, not relying on the version using heuristics on the number of args with no args provided. It should still produce the same costs for scalarized gathers/scatters. </cut> Results regressed to (for first_bad == e771614bae0a05585f720812d5936a0b81dcddf0) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_marm artifacts/build-e771614bae0a05585f720812d5936a0b81dcddf0/results_id: 1 # 445.gobmk,[.] fastlib regressed by 115 from (for last_good == a31eae840525e9292a3a42c1fdac3fc594f42949) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_marm artifacts/build-a31eae840525e9292a3a42c1fdac3fc594f42949/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of last_good: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O3/3644 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of first_bad: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O3/3642 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-e771614bae0a05585f720812d5936a0b81dcddf0 cd investigate-llvm-e771614bae0a05585f720812d5936a0b81dcddf0 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach e771614bae0a05585f720812d5936a0b81dcddf0 ../artifacts/test.sh # Reproduce last_good build git checkout --detach a31eae840525e9292a3a42c1fdac3fc594f42949 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Full commit (up to 1000 lines): <cut> commit e771614bae0a05585f720812d5936a0b81dcddf0 Author: David Green <david.green(a)arm.com> Date: Thu Feb 11 11:58:55 2021 +0000 [ARM] Change getScalarizationOverhead overload used in gather costs. NFC This changes which of the getScalarizationOverhead overloads is used in the gather/scatter cost to use the base variant directly, not relying on the version using heuristics on the number of args with no args provided. It should still produce the same costs for scalarized gathers/scatters. --- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index af67839c2d75..de2c0607d2ed 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1416,8 +1416,9 @@ unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, unsigned VectorCost = NumElems * LT.first * ST->getMVEVectorCostFactor(); // The scalarization cost should be a lot higher. We use the number of vector // elements plus the scalarization overhead. - unsigned ScalarCost = - NumElems * LT.first + BaseT::getScalarizationOverhead(VTy, {}); + unsigned ScalarCost = NumElems * LT.first + + BaseT::getScalarizationOverhead(VTy, true, false) + + BaseT::getScalarizationOverhead(VTy, false, true); if (EltSize < 8 || Alignment < EltSize / 8) return ScalarCost; </cut>

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/llvm-master-aarch64-spec2k6-O3 - Build # 19 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O3. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O3 Culprit: <cut> commit 4389a413e2129d7d55ee779638b649aa852b6f8a Author: Zahira Ammarguellat <zahira.ammarguellat(a)intel.com> Date: Fri Aug 6 12:01:47 2021 -0700 Revert "[clang][fpenv][patch] Change clang option -ffp-model=precise to select ffp-contract=on" This reverts commit 48ad446a0fb2c9b98cb7047e4daf8a84c29cef8f. </cut> Results regressed to (for first_bad == 4389a413e2129d7d55ee779638b649aa852b6f8a) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3 artifacts/build-4389a413e2129d7d55ee779638b649aa852b6f8a/results_id: 1 # 470.lbm,lbm_base.default regressed by 108 # 447.dealII,dealII_base.default regressed by 104 # 447.dealII,[.] _ZNK12SparseMatrixIdE5vmultI6VectorIdES3_EEvRT regressed by 120 # 444.namd,namd_base.default regressed by 104 # 400.perlbench,perlbench_base.default regressed by 103 from (for last_good == dfce2909ee1ea1523ec27b834a0e56429e9c2beb) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3 artifacts/build-dfce2909ee1ea1523ec27b834a0e56429e9c2beb/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O3/3640 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O3/3621 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-4389a413e2129d7d55ee779638b649aa852b6f8a cd investigate-llvm-4389a413e2129d7d55ee779638b649aa852b6f8a git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach 4389a413e2129d7d55ee779638b649aa852b6f8a ../artifacts/test.sh # Reproduce last_good build git checkout --detach dfce2909ee1ea1523ec27b834a0e56429e9c2beb ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Full commit (up to 1000 lines): <cut> commit 4389a413e2129d7d55ee779638b649aa852b6f8a Author: Zahira Ammarguellat <zahira.ammarguellat(a)intel.com> Date: Fri Aug 6 12:01:47 2021 -0700 Revert "[clang][fpenv][patch] Change clang option -ffp-model=precise to select ffp-contract=on" This reverts commit 48ad446a0fb2c9b98cb7047e4daf8a84c29cef8f. --- clang/docs/UsersManual.rst | 48 ++----------------------- clang/lib/Driver/ToolChains/Clang.cpp | 33 ++++++++--------- clang/test/CodeGen/ffp-contract-option.c | 47 +++--------------------- clang/test/CodeGen/ppc-emmintrin.c | 4 +-- clang/test/CodeGen/ppc-xmmintrin.c | 4 +-- clang/test/Driver/fp-model.c | 61 +++++++++++++++----------------- 6 files changed, 58 insertions(+), 139 deletions(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 838669794ea8..980d0ab45975 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1260,50 +1260,8 @@ installed. Controlling Floating Point Behavior ----------------------------------- -Clang provides a number of ways to control floating point behavior, including -with command line options and source pragmas. This section -describes the various floating point semantic modes and the corresponding options. - -.. csv-table:: Floating Point Semantic Modes - :header: "Mode", "Values" - :widths: 15, 30, 30 - - "except_behavior", "{ignore, strict, may_trap}", "ffp-exception-behavior" - "fenv_access", "{off, on}", "(none)" - "rounding_mode", "{dynamic, tonearest, downward, upward, towardzero}", "frounding-math" - "contract", "{on, off, fast}", "ffp-contract" - "denormal_fp_math", "{IEEE, PreserveSign, PositiveZero}", "fdenormal-fp-math" - "denormal_fp32_math", "{IEEE, PreserveSign, PositiveZero}", "fdenormal-fp-math-fp32" - "support_math_errno", "{on, off}", "fmath-errno" - "no_honor_nans", "{on, off}", "fhonor-nans" - "no_honor_infinities", "{on, off}", "fhonor-infinities" - "no_signed_zeros", "{on, off}", "fsigned-zeros" - "allow_reciprocal", "{on, off}", "freciprocal-math" - "allow_approximate_fns", "{on, off}", "(none)" - "allow_reassociation", "{on, off}", "fassociative-math" - - -This table describes the option settings that correspond to the three -floating point semantic models: precise (the default), strict, and fast. - - -.. csv-table:: Floating Point Models - :header: "Mode", "Precise", "Strict", "Fast" - :widths: 25, 15, 15, 15 - - "except_behavior", "ignore", "strict", "ignore" - "fenv_access", "off", "on", "off" - "rounding_mode", "tonearest", "dynamic", "tonearest" - "contract", "on", "off", "fast" - "denormal_fp_math", "IEEE", "IEEE", "PreserveSign" - "denormal_fp32_math", "IEEE","IEEE", "PreserveSign" - "support_math_errno", "on", "on", "off" - "no_honor_nans", "off", "off", "on" - "no_honor_infinities", "off", "off", "on" - "no_signed_zeros", "off", "off", "on" - "allow_reciprocal", "off", "off", "on" - "allow_approximate_fns", "off", "off", "on" - "allow_reassociation", "off", "off", "on" +Clang provides a number of ways to control floating point behavior. The options +are listed below. .. option:: -ffast-math @@ -1498,7 +1456,7 @@ Note that floating-point operations performed as part of constant initialization and ``fast``. Details: - * ``precise`` Disables optimizations that are not value-safe on floating-point data, although FP contraction (FMA) is enabled (``-ffp-contract=on``). This is the default behavior. + * ``precise`` Disables optimizations that are not value-safe on floating-point data, although FP contraction (FMA) is enabled (``-ffp-contract=fast``). This is the default behavior. * ``strict`` Enables ``-frounding-math`` and ``-ffp-exception-behavior=strict``, and disables contractions (FMA). All of the ``-ffast-math`` enablements are disabled. Enables ``STDC FENV_ACCESS``: by default ``FENV_ACCESS`` is disabled. This option setting behaves as though ``#pragma STDC FENV_ACESS ON`` appeared at the top of the source file. * ``fast`` Behaves identically to specifying both ``-ffast-math`` and ``ffp-contract=fast`` diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 1c79640be80f..96bbc0250126 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2641,7 +2641,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, llvm::DenormalMode DenormalFPMath = DefaultDenormalFPMath; llvm::DenormalMode DenormalFP32Math = DefaultDenormalFP32Math; - StringRef FPContract = "on"; + StringRef FPContract = ""; bool StrictFPModel = false; @@ -2666,7 +2666,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, ReciprocalMath = false; SignedZeros = true; // -fno_fast_math restores default denormal and fpcontract handling - FPContract = "on"; + FPContract = ""; DenormalFPMath = llvm::DenormalMode::getIEEE(); // FIXME: The target may have picked a non-IEEE default mode here based on @@ -2686,18 +2686,20 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, // ffp-model= is a Driver option, it is entirely rewritten into more // granular options before being passed into cc1. // Use the gcc option in the switch below. - if (!FPModel.empty() && !FPModel.equals(Val)) + if (!FPModel.empty() && !FPModel.equals(Val)) { D.Diag(clang::diag::warn_drv_overriding_flag_option) << Args.MakeArgString("-ffp-model=" + FPModel) << Args.MakeArgString("-ffp-model=" + Val); + FPContract = ""; + } if (Val.equals("fast")) { optID = options::OPT_ffast_math; FPModel = Val; - FPContract = Val; + FPContract = "fast"; } else if (Val.equals("precise")) { optID = options::OPT_ffp_contract; FPModel = Val; - FPContract = "on"; + FPContract = "fast"; PreciseFPModel = true; } else if (Val.equals("strict")) { StrictFPModel = true; @@ -2783,11 +2785,9 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, case options::OPT_ffp_contract: { StringRef Val = A->getValue(); if (PreciseFPModel) { - // When -ffp-model=precise is seen on the command line, - // the boolean PreciseFPModel is set to true which indicates - // "the current option is actually PreciseFPModel". The optID - // is changed to OPT_ffp_contract and FPContract is set to "on". - // the argument Val string is "precise": it shouldn't be checked. + // -ffp-model=precise enables ffp-contract=fast as a side effect + // the FPContract value has already been set to a string literal + // and the Val string isn't a pertinent value. ; } else if (Val.equals("fast") || Val.equals("on") || Val.equals("off")) FPContract = Val; @@ -2897,17 +2897,18 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, // -fno_fast_math restores default denormal and fpcontract handling DenormalFPMath = DefaultDenormalFPMath; DenormalFP32Math = llvm::DenormalMode::getIEEE(); - FPContract = "on"; + FPContract = ""; break; } if (StrictFPModel) { // If -ffp-model=strict has been specified on command line but // subsequent options conflict then emit warning diagnostic. - if (HonorINFs && HonorNaNs && !AssociativeMath && !ReciprocalMath && - SignedZeros && TrappingMath && RoundingFPMath && - DenormalFPMath == llvm::DenormalMode::getIEEE() && - DenormalFP32Math == llvm::DenormalMode::getIEEE() && - FPContract.equals("off")) + if (HonorINFs && HonorNaNs && + !AssociativeMath && !ReciprocalMath && + SignedZeros && TrappingMath && RoundingFPMath && + (FPContract.equals("off") || FPContract.empty()) && + DenormalFPMath == llvm::DenormalMode::getIEEE() && + DenormalFP32Math == llvm::DenormalMode::getIEEE()) // OK: Current Arg doesn't conflict with -ffp-model=strict ; else { diff --git a/clang/test/CodeGen/ffp-contract-option.c b/clang/test/CodeGen/ffp-contract-option.c index efc72c2b5461..52b750795940 100644 --- a/clang/test/CodeGen/ffp-contract-option.c +++ b/clang/test/CodeGen/ffp-contract-option.c @@ -1,46 +1,9 @@ -// RUN: %clang_cc1 -O3 -ffp-contract=fast -triple=aarch64-apple-darwin -S -o - %s | FileCheck --check-prefix=CHECK-FMADD %s +// RUN: %clang_cc1 -O3 -ffp-contract=fast -triple=aarch64-apple-darwin -S -o - %s | FileCheck %s // REQUIRES: aarch64-registered-target float fma_test1(float a, float b, float c) { -// CHECK-FMADD: fmadd - float x = a * b; - float y = x + c; - return y; -} - -// RUN: %clang_cc1 -triple=x86_64 %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-DEFAULT %s -// -// RUN: %clang_cc1 -triple=x86_64 -ffp-contract=off %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-DEFAULT %s -// RUN: %clang_cc1 -triple=x86_64 -ffp-contract=on %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-ON %s -// RUN: %clang_cc1 -triple=x86_64 -ffp-contract=fast %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-CONTRACTFAST %s -// -// RUN: %clang_cc1 -triple=x86_64 -ffast-math %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-DEFAULTFAST %s -// RUN: %clang_cc1 -triple=x86_64 -ffast-math -ffp-contract=off %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-DEFAULTFAST %s -// RUN: %clang_cc1 -triple=x86_64 -ffast-math -ffp-contract=on %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-ONFAST %s -// RUN: %clang_cc1 -triple=x86_64 -ffast-math -ffp-contract=fast %s -emit-llvm -o - \ -// RUN:| FileCheck --check-prefix=CHECK-FASTFAST %s -float mymuladd( float x, float y, float z ) { - return x * y + z; - // CHECK-DEFAULT: = fmul float - // CHECK-DEFAULT: = fadd float - - // CHECK-ON: = call float @llvm.fmuladd.f32 - - // CHECK-CONTRACTFAST: = fmul contract float - // CHECK-CONTRACTFAST: = fadd contract float - - // CHECK-DEFAULTFAST: = fmul reassoc nnan ninf nsz arcp afn float - // CHECK-DEFAULTFAST: = fadd reassoc nnan ninf nsz arcp afn float - - // CHECK-ONFAST: = call reassoc nnan ninf nsz arcp afn float @llvm.fmuladd.f32 - - // CHECK-FASTFAST: = fmul fast float - // CHECK-FASTFAST: = fadd fast float +// CHECK: fmadd + float x = a * b; + float y = x + c; + return y; } diff --git a/clang/test/CodeGen/ppc-emmintrin.c b/clang/test/CodeGen/ppc-emmintrin.c index 4a246ff92d76..fa3801f50a01 100644 --- a/clang/test/CodeGen/ppc-emmintrin.c +++ b/clang/test/CodeGen/ppc-emmintrin.c @@ -2,9 +2,9 @@ // REQUIRES: powerpc-registered-target // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ -// RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ -// RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE // CHECK-BE-DAG: @_mm_movemask_pd.perm_mask = internal constant <4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 -2139078656>, align 16 // CHECK-BE-DAG: @_mm_shuffle_epi32.permute_selectors = internal constant [4 x i32] [i32 66051, i32 67438087, i32 134810123, i32 202182159], align 4 diff --git a/clang/test/CodeGen/ppc-xmmintrin.c b/clang/test/CodeGen/ppc-xmmintrin.c index a7f6ed6e0e67..d3f18bfbb1e5 100644 --- a/clang/test/CodeGen/ppc-xmmintrin.c +++ b/clang/test/CodeGen/ppc-xmmintrin.c @@ -2,11 +2,11 @@ // REQUIRES: powerpc-registered-target // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ -// RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE // RUN: %clang -x c++ -fsyntax-only -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ -// RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE // RUN: %clang -x c++ -fsyntax-only -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns diff --git a/clang/test/Driver/fp-model.c b/clang/test/Driver/fp-model.c index c6d683e25c0b..5fa9d110dd83 100644 --- a/clang/test/Driver/fp-model.c +++ b/clang/test/Driver/fp-model.c @@ -1,90 +1,88 @@ // Test that incompatible combinations of -ffp-model= options // and other floating point options get a warning diagnostic. +// +// REQUIRES: clang-driver -// RUN: %clang -target x86_64 -### -ffp-model=fast -ffp-contract=off -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=fast -ffp-contract=off -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN %s // WARN: warning: overriding '-ffp-model=fast' option with '-ffp-contract=off' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=fast -ffp-contract=on -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=fast -ffp-contract=on -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN1 %s // WARN1: warning: overriding '-ffp-model=fast' option with '-ffp-contract=on' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fassociative-math -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fassociative-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN2 %s // WARN2: warning: overriding '-ffp-model=strict' option with '-fassociative-math' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -ffast-math -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -ffast-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN3 %s // WARN3: warning: overriding '-ffp-model=strict' option with '-ffast-math' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -ffinite-math-only -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -ffinite-math-only -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN4 %s // WARN4: warning: overriding '-ffp-model=strict' option with '-ffinite-math-only' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -ffp-contract=fast -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -ffp-contract=fast -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN5 %s // WARN5: warning: overriding '-ffp-model=strict' option with '-ffp-contract=fast' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -ffp-contract=fast -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=WARN6 %s -// WARN6: warning: overriding '-ffp-model=strict' option with '-ffp-contract=fast' [-Woverriding-t-option] - -// RUN: %clang -target x86_64 -### -ffp-model=strict -ffp-contract=on -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -ffp-contract=on -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN7 %s // WARN7: warning: overriding '-ffp-model=strict' option with '-ffp-contract=on' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fno-honor-infinities -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fno-honor-infinities -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN8 %s // WARN8: warning: overriding '-ffp-model=strict' option with '-fno-honor-infinities' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fno-honor-nans -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fno-honor-nans -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN9 %s // WARN9: warning: overriding '-ffp-model=strict' option with '-fno-honor-nans' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fno-rounding-math -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fno-rounding-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARNa %s // WARNa: warning: overriding '-ffp-model=strict' option with '-fno-rounding-math' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fno-signed-zeros -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fno-signed-zeros -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARNb %s // WARNb: warning: overriding '-ffp-model=strict' option with '-fno-signed-zeros' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fno-trapping-math -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fno-trapping-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARNc %s // WARNc: warning: overriding '-ffp-model=strict' option with '-fno-trapping-math' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -freciprocal-math -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -freciprocal-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARNd %s // WARNd: warning: overriding '-ffp-model=strict' option with '-freciprocal-math' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -funsafe-math-optimizations -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -funsafe-math-optimizations -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARNe %s // WARNe: warning: overriding '-ffp-model=strict' option with '-funsafe-math-optimizations' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -Ofast -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -Ofast -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARNf %s // WARNf: warning: overriding '-ffp-model=strict' option with '-Ofast' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -ffp-model=strict -fdenormal-fp-math=preserve-sign,preserve-sign -c %s 2>&1 \ +// RUN: %clang -### -ffp-model=strict -fdenormal-fp-math=preserve-sign,preserve-sign -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN10 %s // WARN10: warning: overriding '-ffp-model=strict' option with '-fdenormal-fp-math=preserve-sign,preserve-sign' [-Woverriding-t-option] -// RUN: %clang -target x86_64 -### -c %s 2>&1 \ +// RUN: %clang -### -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-NOROUND %s // CHECK-NOROUND: "-cc1" // CHECK-NOROUND: "-fno-rounding-math" -// RUN: %clang -target x86_64 -### -frounding-math -c %s 2>&1 \ +// RUN: %clang -### -frounding-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-ROUND --implicit-check-not ffp-exception-behavior=strict %s // CHECK-ROUND: "-cc1" // CHECK-ROUND: "-frounding-math" -// RUN: %clang -target x86_64 -### -ftrapping-math -c %s 2>&1 \ +// RUN: %clang -### -ftrapping-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-TRAP %s // CHECK-TRAP: "-cc1" // CHECK-TRAP: "-ffp-exception-behavior=strict" -// RUN: %clang -target x86_64 -### -nostdinc -ffp-model=fast -c %s 2>&1 \ +// RUN: %clang -### -nostdinc -ffp-model=fast -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FPM-FAST %s // CHECK-FPM-FAST: "-cc1" // CHECK-FPM-FAST: "-menable-no-infs" @@ -98,35 +96,34 @@ // CHECK-FPM-FAST: "-ffast-math" // CHECK-FPM-FAST: "-ffinite-math-only" -// RUN: %clang -target x86_64 -### -nostdinc -ffp-model=precise -c %s 2>&1 \ +// RUN: %clang -### -nostdinc -ffp-model=precise -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FPM-PRECISE %s // CHECK-FPM-PRECISE: "-cc1" -// CHECK-FPM-PRECISE: "-ffp-contract=on" +// CHECK-FPM-PRECISE: "-ffp-contract=fast" // CHECK-FPM-PRECISE: "-fno-rounding-math" -// RUN: %clang -target x86_64 -### -nostdinc -ffp-model=strict -c %s 2>&1 \ +// RUN: %clang -### -nostdinc -ffp-model=strict -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FPM-STRICT %s // CHECK-FPM-STRICT: "-cc1" -// CHECK-FPM-STRICT: "-fmath-errno" -// CHECK-FPM-STRICT: "-ffp-contract=off" // CHECK-FPM-STRICT: "-frounding-math" // CHECK-FPM-STRICT: "-ffp-exception-behavior=strict" -// RUN: %clang -target x86_64 -### -nostdinc -ffp-exception-behavior=strict -c %s 2>&1 \ +// RUN: %clang -### -nostdinc -ffp-exception-behavior=strict -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FEB-STRICT %s // CHECK-FEB-STRICT: "-cc1" // CHECK-FEB-STRICT: "-fno-rounding-math" // CHECK-FEB-STRICT: "-ffp-exception-behavior=strict" -// RUN: %clang -target x86_64 -### -nostdinc -ffp-exception-behavior=maytrap -c %s 2>&1 \ +// RUN: %clang -### -nostdinc -ffp-exception-behavior=maytrap -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FEB-MAYTRAP %s // CHECK-FEB-MAYTRAP: "-cc1" // CHECK-FEB-MAYTRAP: "-fno-rounding-math" // CHECK-FEB-MAYTRAP: "-ffp-exception-behavior=maytrap" -// RUN: %clang -target x86_64 -### -nostdinc -ffp-exception-behavior=ignore -c %s 2>&1 \ +// RUN: %clang -### -nostdinc -ffp-exception-behavior=ignore -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FEB-IGNORE %s // CHECK-FEB-IGNORE: "-cc1" // CHECK-FEB-IGNORE: "-fno-rounding-math" // CHECK-FEB-IGNORE: "-ffp-exception-behavior=ignore" + </cut>

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/llvm-release-aarch64-spec2k6-O3_LTO - Build # 5 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-release-aarch64-spec2k6-O3_LTO. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tx1/llvm-release-aarch64-spec2k6-O3_LTO Culprit: <cut> commit 669ddd1e9b1226432b003dbba05b99f8e992285b Author: Arthur Eubanks <aeubanks(a)google.com> Date: Mon Jan 25 11:00:56 2021 -0800 Turn on the new pass manager by default This turns on the new pass manager by default for the optimization pipeline in Clang and ThinLTO in various LLD backends. This also makes uses of `opt -instcombine` use the new pass manager (unless specifically opted out). This does not affect the backend target-dependent codegen pipeline. If this causes regressions, you can opt out of the new pass manager either via the -DENABLE_EXPERIMENTAL_NEW_PASS_MANAGER=OFF CMake flag while building LLVM, or via various compiler flags, e.g. -flegacy-pass-manager for Clang or -Wl,--lto-legacy-pass-manager for ELF LLD. Please file bugs for any regressions. Major differences: * The inliner works slightly differently * -O1 does some amount of inlining * LCSSA and LoopSimplify are run before all loop passes * Loop unswitching is implemented slightly differently * A new SpeculateAroundPHIs pass is added to the pipeline https://lists.llvm.org/pipermail/llvm-dev/2021-January/148098.html Reviewed By: asbirlea, ychen, MaskRay, echristo Differential Revision: https://reviews.llvm.org/D95380 </cut> Results regressed to (for first_bad == 669ddd1e9b1226432b003dbba05b99f8e992285b) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_LTO artifacts/build-669ddd1e9b1226432b003dbba05b99f8e992285b/results_id: 1 # 473.astar,astar_base.default regressed by 106 from (for last_good == b15cbaf5a03d0b32dbc32c37766e32ccf66e6c87) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3_LTO artifacts/build-b15cbaf5a03d0b32dbc32c37766e32ccf66e6c87/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-release-aarch64-spec2k6-O3_LTO/3543 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-release-aarch64-spec2k6-O3_LTO/3539 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-669ddd1e9b1226432b003dbba05b99f8e992285b cd investigate-llvm-669ddd1e9b1226432b003dbba05b99f8e992285b git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach 669ddd1e9b1226432b003dbba05b99f8e992285b ../artifacts/test.sh # Reproduce last_good build git checkout --detach b15cbaf5a03d0b32dbc32c37766e32ccf66e6c87 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-release… Full commit (up to 1000 lines): <cut> commit 669ddd1e9b1226432b003dbba05b99f8e992285b Author: Arthur Eubanks <aeubanks(a)google.com> Date: Mon Jan 25 11:00:56 2021 -0800 Turn on the new pass manager by default This turns on the new pass manager by default for the optimization pipeline in Clang and ThinLTO in various LLD backends. This also makes uses of `opt -instcombine` use the new pass manager (unless specifically opted out). This does not affect the backend target-dependent codegen pipeline. If this causes regressions, you can opt out of the new pass manager either via the -DENABLE_EXPERIMENTAL_NEW_PASS_MANAGER=OFF CMake flag while building LLVM, or via various compiler flags, e.g. -flegacy-pass-manager for Clang or -Wl,--lto-legacy-pass-manager for ELF LLD. Please file bugs for any regressions. Major differences: * The inliner works slightly differently * -O1 does some amount of inlining * LCSSA and LoopSimplify are run before all loop passes * Loop unswitching is implemented slightly differently * A new SpeculateAroundPHIs pass is added to the pipeline https://lists.llvm.org/pipermail/llvm-dev/2021-January/148098.html Reviewed By: asbirlea, ychen, MaskRay, echristo Differential Revision: https://reviews.llvm.org/D95380 --- llvm/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 1affc289e64b..f5298de9f7ca 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -688,8 +688,8 @@ else() endif() option(LLVM_ENABLE_PLUGINS "Enable plugin support" ${LLVM_ENABLE_PLUGINS_default}) -set(ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER FALSE CACHE BOOL - "Enable the experimental new pass manager by default.") +set(ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER TRUE CACHE BOOL + "Enable the new pass manager by default.") include(HandleLLVMOptions) </cut>

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_apm/llvm-master-aarch64-spec2k6-Os_LTO - Build # 5 - Fixed!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_apm/llvm-master-aarch64-spec2k6-Os_LTO. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_apm/llvm-master-aarch64-spec2k6-Os_LTO Culprit: <cut> commit 0a241e90d4931a764109da70c2322a0a9386b1a7 Author: Roman Lebedev <lebedev.ri(a)gmail.com> Date: Sat Aug 7 15:59:21 2021 +0300 [NFC][InstCombine] `vector_reduce_xor(?ext(<n x i1>))` --> `?ext(vector_reduce_add(<n x i1>))` Instead of expanding it ourselves, we can just forward to `?ext(vector_reduce_add(<n x i1>))`, as per alive2: https://alive2.llvm.org/ce/z/ymz7zE (self) https://alive2.llvm.org/ce/z/eKu2v2 (skipped zext) https://alive2.llvm.org/ce/z/c3BXgc (skipped sext) </cut> Results regressed to (for first_bad == 0a241e90d4931a764109da70c2322a0a9386b1a7) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Os_LTO artifacts/build-0a241e90d4931a764109da70c2322a0a9386b1a7/results_id: 1 # 433.milc,milc_base.default regressed by 102 # 453.povray,povray_base.default regressed by 102 # 470.lbm,lbm_base.default regressed by 103 from (for last_good == c6ff867f92f67c6451ed1709f2af66211cb7b901) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Os_LTO artifacts/build-c6ff867f92f67c6451ed1709f2af66211cb7b901/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Results ID of last_good: apm_64/tcwg_bmk_llvm_apm/bisect-llvm-master-aarch64-spec2k6-Os_LTO/3523 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Results ID of first_bad: apm_64/tcwg_bmk_llvm_apm/bisect-llvm-master-aarch64-spec2k6-Os_LTO/3530 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-0a241e90d4931a764109da70c2322a0a9386b1a7 cd investigate-llvm-0a241e90d4931a764109da70c2322a0a9386b1a7 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach 0a241e90d4931a764109da70c2322a0a9386b1a7 ../artifacts/test.sh # Reproduce last_good build git checkout --detach c6ff867f92f67c6451ed1709f2af66211cb7b901 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Full commit (up to 1000 lines): <cut> commit 0a241e90d4931a764109da70c2322a0a9386b1a7 Author: Roman Lebedev <lebedev.ri(a)gmail.com> Date: Sat Aug 7 15:59:21 2021 +0300 [NFC][InstCombine] `vector_reduce_xor(?ext(<n x i1>))` --> `?ext(vector_reduce_add(<n x i1>))` Instead of expanding it ourselves, we can just forward to `?ext(vector_reduce_add(<n x i1>))`, as per alive2: https://alive2.llvm.org/ce/z/ymz7zE (self) https://alive2.llvm.org/ce/z/eKu2v2 (skipped zext) https://alive2.llvm.org/ce/z/c3BXgc (skipped sext) --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 2ba326d791c2..210652e23377 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2038,20 +2038,17 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (IID == Intrinsic::vector_reduce_xor) { // Exclusive disjunction reduction over the vector with // (potentially-extended) i1 element type is actually a - // (potentially-extended) parity check: + // (potentially-extended) arithmetic `add` reduction over the original + // non-extended value: // vector_reduce_xor(?ext(<n x i1>)) // --> - // ?ext(trunc(vector_reduce_and(<n x i1>) to i1)) + // ?ext(vector_reduce_add(<n x i1>)) Value *Arg = II->getArgOperand(0); Value *Vect; if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) { if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType())) if (FTy->getElementType() == Builder.getInt1Ty()) { - Value *V = Builder.CreateBitCast( - Vect, Builder.getIntNTy(FTy->getNumElements())); - Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V); - Res = Builder.CreateTrunc(Res, - IntegerType::get(Res->getContext(), 1)); + Value *Res = Builder.CreateAddReduce(Vect); if (Arg != Vect) Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res, II->getType()); </cut>

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tk1/llvm-release-arm-spec2k6-O2 - Build # 16 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O2. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tk1/llvm-release-arm-spec2k6-O2 Culprit: <cut> commit 7d15c4941499b1454432d63604dc6bbe56e32ce7 Author: Sanjay Patel <spatel(a)rotateright.com> Date: Tue May 18 14:02:11 2021 -0400 [x86] trim zeros from constants for readability; NFC </cut> Results regressed to (for first_bad == 7d15c4941499b1454432d63604dc6bbe56e32ce7) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2_marm artifacts/build-7d15c4941499b1454432d63604dc6bbe56e32ce7/results_id: 1 # 482.sphinx3,sphinx_livepretend_base.default regressed by 104 from (for last_good == 2257e4a70e4aabe7255161f3a54922d7dcf1c059) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2_marm artifacts/build-2257e4a70e4aabe7255161f3a54922d7dcf1c059/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of last_good: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O2/3446 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Results ID of first_bad: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-release-arm-spec2k6-O2/3510 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-7d15c4941499b1454432d63604dc6bbe56e32ce7 cd investigate-llvm-7d15c4941499b1454432d63604dc6bbe56e32ce7 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach 7d15c4941499b1454432d63604dc6bbe56e32ce7 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 2257e4a70e4aabe7255161f3a54922d7dcf1c059 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-release… Full commit (up to 1000 lines): <cut> commit 7d15c4941499b1454432d63604dc6bbe56e32ce7 Author: Sanjay Patel <spatel(a)rotateright.com> Date: Tue May 18 14:02:11 2021 -0400 [x86] trim zeros from constants for readability; NFC --- llvm/test/CodeGen/X86/fma-fneg-combine.ll | 58 +++++++++++++++---------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/llvm/test/CodeGen/X86/fma-fneg-combine.ll b/llvm/test/CodeGen/X86/fma-fneg-combine.ll index a67595c1e95a..9e47548f27f1 100644 --- a/llvm/test/CodeGen/X86/fma-fneg-combine.ll +++ b/llvm/test/CodeGen/X86/fma-fneg-combine.ll @@ -11,7 +11,7 @@ define <16 x float> @test1(<16 x float> %a, <16 x float> %b, <16 x float> %c) { ; CHECK-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 ; CHECK-NEXT: retq entry: - %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c + %sub.i = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 4) #2 ret <16 x float> %0 } @@ -28,7 +28,7 @@ define <16 x float> @test2(<16 x float> %a, <16 x float> %b, <16 x float> %c) { ; CHECK-NEXT: retq entry: %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i32 4) #2 - %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 + %sub.i = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %0 ret <16 x float> %sub.i } @@ -39,7 +39,7 @@ define <16 x float> @test3(<16 x float> %a, <16 x float> %b, <16 x float> %c) { ; CHECK-NEXT: retq entry: %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2 - %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 + %sub.i = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %0 ret <16 x float> %sub.i } @@ -50,7 +50,7 @@ define <16 x float> @test4(<16 x float> %a, <16 x float> %b, <16 x float> %c) { ; CHECK-NEXT: retq entry: %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2 - %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 + %sub.i = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %0 ret <16 x float> %sub.i } @@ -60,7 +60,7 @@ define <16 x float> @test5(<16 x float> %a, <16 x float> %b, <16 x float> %c) { ; CHECK-NEXT: vfmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 ; CHECK-NEXT: retq entry: - %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c + %sub.i = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 10) #2 ret <16 x float> %0 } @@ -72,7 +72,7 @@ define <16 x float> @test6(<16 x float> %a, <16 x float> %b, <16 x float> %c) { ; CHECK-NEXT: retq entry: %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 10) #2 - %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 + %sub.i = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %0 ret <16 x float> %sub.i } @@ -84,7 +84,7 @@ define <8 x float> @test7(<8 x float> %a, <8 x float> %b, <8 x float> %c) { ; CHECK-NEXT: retq entry: %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 - %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 + %sub.i = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %0 ret <8 x float> %sub.i } @@ -94,7 +94,7 @@ define <8 x float> @test8(<8 x float> %a, <8 x float> %b, <8 x float> %c) { ; CHECK-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 ; CHECK-NEXT: retq entry: - %sub.c = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c + %sub.c = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %sub.c) #2 ret <8 x float> %0 } @@ -109,7 +109,7 @@ define <8 x double> @test9(<8 x double> %a, <8 x double> %b, <8 x double> %c) { ; CHECK-NEXT: retq entry: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32 4) #2 - %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %0 + %sub.i = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %0 ret <8 x double> %sub.i } @@ -123,7 +123,7 @@ define <2 x double> @test10(<2 x double> %a, <2 x double> %b, <2 x double> %c) { ; CHECK-NEXT: retq entry: %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 -1, i32 4) #2 - %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %0 + %sub.i = fsub <2 x double> <double -0.0, double -0.0>, %0 ret <2 x double> %sub.i } @@ -149,7 +149,7 @@ define <4 x float> @test11(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 ze ; KNL-NEXT: vmovaps %xmm3, %xmm0 ; KNL-NEXT: retq entry: - %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c + %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %c %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 4) #10 ret <4 x float> %0 } @@ -169,7 +169,7 @@ define <4 x float> @test11b(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 z ; KNL-NEXT: vfmsub213ss {{.*#+}} xmm0 {%k1} = (xmm1 * xmm0) - xmm2 ; KNL-NEXT: retq entry: - %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c + %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %c %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 4) #10 ret <4 x float> %0 } @@ -194,7 +194,7 @@ entry: %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32 4) #2 %bc = bitcast i8 %mask to <8 x i1> %sel = select <8 x i1> %bc, <8 x double> %0, <8 x double> %a - %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %sel + %sub.i = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sel ret <8 x double> %sub.i } @@ -218,7 +218,7 @@ define <2 x double> @test13(<2 x double> %a, <2 x double> %b, <2 x double> %c, i ; KNL-NEXT: retq entry: - %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a + %sub.i = fsub <2 x double> <double -0.0, double -0.0>, %a %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4) ret <2 x double> %0 } @@ -239,7 +239,7 @@ define <16 x float> @test14(<16 x float> %a, <16 x float> %b, <16 x float> %c, i ; KNL-NEXT: retq entry: %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 10) #2 - %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 + %sub.i = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %0 ret <16 x float> %sub.i } @@ -265,7 +265,7 @@ define <16 x float> @test15(<16 x float> %a, <16 x float> %b, <16 x float> %c, i ; KNL-NEXT: retq entry: %bc = bitcast i16 %mask to <16 x i1> - %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a + %sub.i = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %a %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub.i, <16 x float> %b, <16 x float> %c, i32 10) %sel = select <16 x i1> %bc, <16 x float> %0, <16 x float> %sub.i %1 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sel, <16 x float> %sub.i, <16 x float> %c, i32 9) @@ -285,7 +285,7 @@ define <16 x float> @test16(<16 x float> %a, <16 x float> %b, <16 x float> %c, i ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: vfmsubadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} ; KNL-NEXT: retq - %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c + %sub.i = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c %res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 9) %bc = bitcast i16 %mask to <16 x i1> %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a @@ -305,7 +305,7 @@ define <8 x double> @test17(<8 x double> %a, <8 x double> %b, <8 x double> %c, i ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: vfmsubadd132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) -/+ zmm2 ; KNL-NEXT: retq - %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c + %sub.i = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %c %res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %sub.i, i32 4) %bc = bitcast i8 %mask to <8 x i1> %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a @@ -326,7 +326,7 @@ define <4 x float> @test18(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 ze ; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 ; KNL-NEXT: retq entry: - %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b + %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %b %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 4) #10 ret <4 x float> %0 } @@ -344,8 +344,8 @@ define <4 x float> @test19(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 ze ; KNL-NEXT: vfnmsub213ss {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 ; KNL-NEXT: retq entry: - %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b - %sub.i.2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c + %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %b + %sub.i.2 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %c %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %sub.i.2, i8 %mask, i32 4) #10 ret <4 x float> %0 } @@ -365,7 +365,7 @@ define <4 x float> @test20(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 ze ; KNL-NEXT: vmovaps %xmm2, %xmm0 ; KNL-NEXT: retq entry: - %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b + %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %b %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 4) #10 ret <4 x float> %0 } @@ -383,7 +383,7 @@ define <4 x float> @test21(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 ze ; KNL-NEXT: vfnmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} ; KNL-NEXT: retq entry: - %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b + %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %b %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 8) #10 ret <4 x float> %0 } @@ -401,8 +401,8 @@ define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 ze ; KNL-NEXT: vfnmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} ; KNL-NEXT: retq entry: - %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b - %sub.i.2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c + %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %b + %sub.i.2 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %c %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %sub.i.2, i8 %mask, i32 8) #10 ret <4 x float> %0 } @@ -422,7 +422,7 @@ define <4 x float> @test23(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 ze ; KNL-NEXT: vmovaps %xmm2, %xmm0 ; KNL-NEXT: retq entry: - %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b + %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %b %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 8) #10 ret <4 x float> %0 } @@ -440,7 +440,7 @@ define <4 x float> @test24(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 ze ; KNL-NEXT: vfmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} ; KNL-NEXT: retq entry: - %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c + %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %c %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 8) #10 ret <4 x float> %0 } @@ -451,8 +451,8 @@ define <16 x float> @test25(<16 x float> %a, <16 x float> %b, <16 x float> %c) ; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ; CHECK-NEXT: retq entry: - %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b - %sub.i.2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c + %sub.i = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %b + %sub.i.2 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %sub.i, <16 x float> %sub.i.2, i32 8) #2 ret <16 x float> %0 } </cut>

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/gnu-release-aarch64-spec2k6-O2 - Build # 25 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *gcc* in CI configuration tcwg_bmk_gnu_tx1/gnu-release-aarch64-spec2k6-O2. So far, this commit has regressed CI configurations: - tcwg_bmk_gnu_tx1/gnu-release-aarch64-spec2k6-O2 Culprit: <cut> commit df7c22831f1e48dba49479c5960c1c180d8eab2c Author: Richard Sandiford <richard.sandiford(a)arm.com> Date: Thu Nov 14 15:12:58 2019 +0000 Support vectorisation with mixed vector sizes After previous patches, it's now possible to make the vectoriser support multiple vector sizes in the same vector region, using related_vector_mode to pick the right vector mode for a given element mode. No port yet takes advantage of this, but I have a follow-on patch for AArch64. This patch also seemed like a good opportunity to add some more dump messages: one to make it clear which vector size/mode was being used when analysis passed or failed, and another to say when we've decided to skip a redundant vector size/mode. 2019-11-14 Richard Sandiford <richard.sandiford(a)arm.com> gcc/ * machmode.h (opt_machine_mode::operator==): New function. (opt_machine_mode::operator!=): Likewise. * tree-vectorizer.h (vec_info::vector_mode): Update comment. (get_related_vectype_for_scalar_type): Delete. (get_vectype_for_scalar_type_and_size): Declare. * tree-vect-slp.c (vect_slp_bb_region): Print dump messages to say whether analysis passed or failed, and with what vector modes. Use related_vector_mode to check whether trying a particular vector mode would be redundant with the autodetected mode, and print a dump message if we decide to skip it. * tree-vect-loop.c (vect_analyze_loop): Likewise. (vect_create_epilog_for_reduction): Use get_related_vectype_for_scalar_type instead of get_vectype_for_scalar_type_and_size. * tree-vect-stmts.c (get_vectype_for_scalar_type_and_size): Replace with... (get_related_vectype_for_scalar_type): ...this new function. Take a starting/"prevailing" vector mode rather than a vector size. Take an optional nunits argument, with the same meaning as for related_vector_mode. Use related_vector_mode when not auto-detecting a mode, falling back to mode_for_vector if no target mode exists. (get_vectype_for_scalar_type): Update accordingly. (get_same_sized_vectype): Likewise. * tree-vectorizer.c (get_vec_alignment_for_array_type): Likewise. From-SVN: r278240 </cut> Results regressed to (for first_bad == df7c22831f1e48dba49479c5960c1c180d8eab2c) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # true: 0 # benchmark -- -O2 artifacts/build-df7c22831f1e48dba49479c5960c1c180d8eab2c/results_id: 1 # 453.povray,[.] _ZN3povL24All_Sphere_IntersectionsEPNS_13Objec regressed by 114 # 482.sphinx3,[.] subvq_mgau_shortlist regressed by 112 from (for last_good == 7f52eb891b738337d5cf82c7c440a5eea8c7b0c9) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # true: 0 # benchmark -- -O2 artifacts/build-7f52eb891b738337d5cf82c7c440a5eea8c7b0c9/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Results ID of last_good: tx1_64/tcwg_bmk_gnu_tx1/bisect-gnu-release-aarch64-spec2k6-O2/3483 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Results ID of first_bad: tx1_64/tcwg_bmk_gnu_tx1/bisect-gnu-release-aarch64-spec2k6-O2/3492 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Configuration details: Reproduce builds: <cut> mkdir investigate-gcc-df7c22831f1e48dba49479c5960c1c180d8eab2c cd investigate-gcc-df7c22831f1e48dba49479c5960c1c180d8eab2c git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /gcc/ ./ ./bisect/baseline/ cd gcc # Reproduce first_bad build git checkout --detach df7c22831f1e48dba49479c5960c1c180d8eab2c ../artifacts/test.sh # Reproduce last_good build git checkout --detach 7f52eb891b738337d5cf82c7c440a5eea8c7b0c9 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Full commit (up to 1000 lines): <cut> commit df7c22831f1e48dba49479c5960c1c180d8eab2c Author: Richard Sandiford <richard.sandiford(a)arm.com> Date: Thu Nov 14 15:12:58 2019 +0000 Support vectorisation with mixed vector sizes After previous patches, it's now possible to make the vectoriser support multiple vector sizes in the same vector region, using related_vector_mode to pick the right vector mode for a given element mode. No port yet takes advantage of this, but I have a follow-on patch for AArch64. This patch also seemed like a good opportunity to add some more dump messages: one to make it clear which vector size/mode was being used when analysis passed or failed, and another to say when we've decided to skip a redundant vector size/mode. 2019-11-14 Richard Sandiford <richard.sandiford(a)arm.com> gcc/ * machmode.h (opt_machine_mode::operator==): New function. (opt_machine_mode::operator!=): Likewise. * tree-vectorizer.h (vec_info::vector_mode): Update comment. (get_related_vectype_for_scalar_type): Delete. (get_vectype_for_scalar_type_and_size): Declare. * tree-vect-slp.c (vect_slp_bb_region): Print dump messages to say whether analysis passed or failed, and with what vector modes. Use related_vector_mode to check whether trying a particular vector mode would be redundant with the autodetected mode, and print a dump message if we decide to skip it. * tree-vect-loop.c (vect_analyze_loop): Likewise. (vect_create_epilog_for_reduction): Use get_related_vectype_for_scalar_type instead of get_vectype_for_scalar_type_and_size. * tree-vect-stmts.c (get_vectype_for_scalar_type_and_size): Replace with... (get_related_vectype_for_scalar_type): ...this new function. Take a starting/"prevailing" vector mode rather than a vector size. Take an optional nunits argument, with the same meaning as for related_vector_mode. Use related_vector_mode when not auto-detecting a mode, falling back to mode_for_vector if no target mode exists. (get_vectype_for_scalar_type): Update accordingly. (get_same_sized_vectype): Likewise. * tree-vectorizer.c (get_vec_alignment_for_array_type): Likewise. From-SVN: r278240 --- gcc/ChangeLog | 28 +++++++++++++++++++++++++ gcc/machmode.h | 3 +++ gcc/tree-vect-loop.c | 54 +++++++++++++++++++++++++++++++++++------------- gcc/tree-vect-slp.c | 33 +++++++++++++++++++++++++---- gcc/tree-vect-stmts.c | 57 ++++++++++++++++++++++++++++++++++++--------------- gcc/tree-vectorizer.c | 2 +- gcc/tree-vectorizer.h | 8 +++++--- 7 files changed, 147 insertions(+), 38 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 41c94140b1a..680aa85121a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,31 @@ +2019-11-14 Richard Sandiford <richard.sandiford(a)arm.com> + + * machmode.h (opt_machine_mode::operator==): New function. + (opt_machine_mode::operator!=): Likewise. + * tree-vectorizer.h (vec_info::vector_mode): Update comment. + (get_related_vectype_for_scalar_type): Delete. + (get_vectype_for_scalar_type_and_size): Declare. + * tree-vect-slp.c (vect_slp_bb_region): Print dump messages to say + whether analysis passed or failed, and with what vector modes. + Use related_vector_mode to check whether trying a particular + vector mode would be redundant with the autodetected mode, + and print a dump message if we decide to skip it. + * tree-vect-loop.c (vect_analyze_loop): Likewise. + (vect_create_epilog_for_reduction): Use + get_related_vectype_for_scalar_type instead of + get_vectype_for_scalar_type_and_size. + * tree-vect-stmts.c (get_vectype_for_scalar_type_and_size): Replace + with... + (get_related_vectype_for_scalar_type): ...this new function. + Take a starting/"prevailing" vector mode rather than a vector size. + Take an optional nunits argument, with the same meaning as for + related_vector_mode. Use related_vector_mode when not + auto-detecting a mode, falling back to mode_for_vector if no + target mode exists. + (get_vectype_for_scalar_type): Update accordingly. + (get_same_sized_vectype): Likewise. + * tree-vectorizer.c (get_vec_alignment_for_array_type): Likewise. + 2019-11-14 Richard Sandiford <richard.sandiford(a)arm.com> * tree-vect-stmts.c (vectorizable_call): Require the types diff --git a/gcc/machmode.h b/gcc/machmode.h index 6750833c2fe..a507ed66c3f 100644 --- a/gcc/machmode.h +++ b/gcc/machmode.h @@ -258,6 +258,9 @@ public: bool exists () const; template<typename U> bool exists (U *) const; + bool operator== (const T &m) const { return m_mode == m; } + bool operator!= (const T &m) const { return m_mode != m; } + private: machine_mode m_mode; }; diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 213d620ed2c..e60c159d11a 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -2435,6 +2435,17 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts); if (mode_i == 0) autodetected_vector_mode = loop_vinfo->vector_mode; + if (dump_enabled_p ()) + { + if (res) + dump_printf_loc (MSG_NOTE, vect_location, + "***** Analysis succeeded with vector mode %s\n", + GET_MODE_NAME (loop_vinfo->vector_mode)); + else + dump_printf_loc (MSG_NOTE, vect_location, + "***** Analysis failed with vector mode %s\n", + GET_MODE_NAME (loop_vinfo->vector_mode)); + } loop->aux = NULL; if (res) @@ -2501,9 +2512,22 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) } if (mode_i < vector_modes.length () - && known_eq (GET_MODE_SIZE (vector_modes[mode_i]), - GET_MODE_SIZE (autodetected_vector_mode))) - mode_i += 1; + && VECTOR_MODE_P (autodetected_vector_mode) + && (related_vector_mode (vector_modes[mode_i], + GET_MODE_INNER (autodetected_vector_mode)) + == autodetected_vector_mode) + && (related_vector_mode (autodetected_vector_mode, + GET_MODE_INNER (vector_modes[mode_i])) + == vector_modes[mode_i])) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "***** Skipping vector mode %s, which would" + " repeat the analysis for %s\n", + GET_MODE_NAME (vector_modes[mode_i]), + GET_MODE_NAME (autodetected_vector_mode)); + mode_i += 1; + } if (mode_i == vector_modes.length () || autodetected_vector_mode == VOIDmode) @@ -4898,13 +4922,14 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, halves against each other. */ enum machine_mode mode1 = mode; tree stype = TREE_TYPE (vectype); - unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype)); - unsigned sz1 = sz; + unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype).to_constant (); + unsigned nunits1 = nunits; if (!slp_reduc && (mode1 = targetm.vectorize.split_reduction (mode)) != mode) - sz1 = GET_MODE_SIZE (mode1).to_constant (); + nunits1 = GET_MODE_NUNITS (mode1).to_constant (); - tree vectype1 = get_vectype_for_scalar_type_and_size (stype, sz1); + tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype), + stype, nunits1); reduce_with_shift = have_whole_vector_shift (mode1); if (!VECTOR_MODE_P (mode1)) reduce_with_shift = false; @@ -4918,11 +4943,13 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, /* First reduce the vector to the desired vector size we should do shift reduction on by combining upper and lower halves. */ new_temp = new_phi_result; - while (sz > sz1) + while (nunits > nunits1) { gcc_assert (!slp_reduc); - sz /= 2; - vectype1 = get_vectype_for_scalar_type_and_size (stype, sz); + nunits /= 2; + vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype), + stype, nunits); + unsigned int bitsize = tree_to_uhwi (TYPE_SIZE (vectype1)); /* The target has to make sure we support lowpart/highpart extraction, either via direct vector extract or through @@ -4947,15 +4974,14 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, = gimple_build_assign (dst2, BIT_FIELD_REF, build3 (BIT_FIELD_REF, vectype1, new_temp, TYPE_SIZE (vectype1), - bitsize_int (sz * BITS_PER_UNIT))); + bitsize_int (bitsize))); gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); } else { /* Extract via punning to appropriately sized integer mode vector. */ - tree eltype = build_nonstandard_integer_type (sz * BITS_PER_UNIT, - 1); + tree eltype = build_nonstandard_integer_type (bitsize, 1); tree etype = build_vector_type (eltype, 2); gcc_assert (convert_optab_handler (vec_extract_optab, TYPE_MODE (etype), @@ -4984,7 +5010,7 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, = gimple_build_assign (tem, BIT_FIELD_REF, build3 (BIT_FIELD_REF, eltype, new_temp, TYPE_SIZE (eltype), - bitsize_int (sz * BITS_PER_UNIT))); + bitsize_int (bitsize))); gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); dst2 = make_ssa_name (vectype1); epilog_stmt = gimple_build_assign (dst2, VIEW_CONVERT_EXPR, diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 3885d9cbe4a..1e00db5a326 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -3203,7 +3203,12 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin, && dbg_cnt (vect_slp)) { if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n"); + { + dump_printf_loc (MSG_NOTE, vect_location, + "***** Analysis succeeded with vector mode" + " %s\n", GET_MODE_NAME (bb_vinfo->vector_mode)); + dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n"); + } bb_vinfo->shared->check_datarefs (); vect_schedule_slp (bb_vinfo); @@ -3223,6 +3228,13 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin, vectorized = true; } + else + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "***** Analysis failed with vector mode %s\n", + GET_MODE_NAME (bb_vinfo->vector_mode)); + } if (mode_i == 0) autodetected_vector_mode = bb_vinfo->vector_mode; @@ -3230,9 +3242,22 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin, delete bb_vinfo; if (mode_i < vector_modes.length () - && known_eq (GET_MODE_SIZE (vector_modes[mode_i]), - GET_MODE_SIZE (autodetected_vector_mode))) - mode_i += 1; + && VECTOR_MODE_P (autodetected_vector_mode) + && (related_vector_mode (vector_modes[mode_i], + GET_MODE_INNER (autodetected_vector_mode)) + == autodetected_vector_mode) + && (related_vector_mode (autodetected_vector_mode, + GET_MODE_INNER (vector_modes[mode_i])) + == vector_modes[mode_i])) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "***** Skipping vector mode %s, which would" + " repeat the analysis for %s\n", + GET_MODE_NAME (vector_modes[mode_i]), + GET_MODE_NAME (autodetected_vector_mode)); + mode_i += 1; + } if (vectorized || mode_i == vector_modes.length () diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 80f59accad7..36f832bb522 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -11138,18 +11138,28 @@ vect_remove_stores (stmt_vec_info first_stmt_info) } } -/* Function get_vectype_for_scalar_type_and_size. +/* If NUNITS is nonzero, return a vector type that contains NUNITS + elements of type SCALAR_TYPE, or null if the target doesn't support + such a type. - Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported - by the target. */ + If NUNITS is zero, return a vector type that contains elements of + type SCALAR_TYPE, choosing whichever vector size the target prefers. + + If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode + for this vectorization region and want to "autodetect" the best choice. + Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE + and we want the new type to be interoperable with it. PREVAILING_MODE + in this case can be a scalar integer mode or a vector mode; when it + is a vector mode, the function acts like a tree-level version of + related_vector_mode. */ tree -get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size) +get_related_vectype_for_scalar_type (machine_mode prevailing_mode, + tree scalar_type, poly_uint64 nunits) { tree orig_scalar_type = scalar_type; scalar_mode inner_mode; machine_mode simd_mode; - poly_uint64 nunits; tree vectype; if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode) @@ -11189,10 +11199,11 @@ get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size) if (scalar_type == NULL_TREE) return NULL_TREE; - /* If no size was supplied use the mode the target prefers. Otherwise - lookup a vector mode of the specified size. */ - if (known_eq (size, 0U)) + /* If no prevailing mode was supplied, use the mode the target prefers. + Otherwise lookup a vector mode based on the prevailing mode. */ + if (prevailing_mode == VOIDmode) { + gcc_assert (known_eq (nunits, 0U)); simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode); if (SCALAR_INT_MODE_P (simd_mode)) { @@ -11208,9 +11219,19 @@ get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size) return NULL_TREE; } } - else if (!multiple_p (size, nbytes, &nunits) - || !mode_for_vector (inner_mode, nunits).exists (&simd_mode)) - return NULL_TREE; + else if (SCALAR_INT_MODE_P (prevailing_mode) + || !related_vector_mode (prevailing_mode, + inner_mode, nunits).exists (&simd_mode)) + { + /* Fall back to using mode_for_vector, mostly in the hope of being + able to use an integer mode. */ + if (known_eq (nunits, 0U) + && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits)) + return NULL_TREE; + + if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode)) + return NULL_TREE; + } vectype = build_vector_type_for_mode (scalar_type, simd_mode); @@ -11238,9 +11259,8 @@ get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size) tree get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type) { - tree vectype; - poly_uint64 vector_size = GET_MODE_SIZE (vinfo->vector_mode); - vectype = get_vectype_for_scalar_type_and_size (scalar_type, vector_size); + tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode, + scalar_type); if (vectype && vinfo->vector_mode == VOIDmode) vinfo->vector_mode = TYPE_MODE (vectype); return vectype; @@ -11273,8 +11293,13 @@ get_same_sized_vectype (tree scalar_type, tree vector_type) if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)) return truth_type_for (vector_type); - return get_vectype_for_scalar_type_and_size - (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type))); + poly_uint64 nunits; + if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)), + GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits)) + return NULL_TREE; + + return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type), + scalar_type, nunits); } /* Function vect_is_simple_use. diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index d6de78350e6..7be81a0b27f 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -1359,7 +1359,7 @@ get_vec_alignment_for_array_type (tree type) poly_uint64 array_size, vector_size; tree scalar_type = strip_array_types (type); - tree vectype = get_vectype_for_scalar_type_and_size (scalar_type, 0); + tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type); if (!vectype || !poly_int_tree_p (TYPE_SIZE (type), &array_size) || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index f6efed1f863..fadc4d89d16 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -335,8 +335,9 @@ public: /* Cost data used by the target cost model. */ void *target_cost_data; - /* If we've chosen a vector size for this vectorization region, - this is one mode that has such a size, otherwise it is VOIDmode. */ + /* The argument we should pass to related_vector_mode when looking up + the vector mode for a scalar mode, or VOIDmode if we haven't yet + made any decisions about which vector modes to use. */ machine_mode vector_mode; private: @@ -1624,8 +1625,9 @@ extern bool vect_can_advance_ivs_p (loop_vec_info); extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code); /* In tree-vect-stmts.c. */ +extern tree get_related_vectype_for_scalar_type (machine_mode, tree, + poly_uint64 = 0); extern tree get_vectype_for_scalar_type (vec_info *, tree); -extern tree get_vectype_for_scalar_type_and_size (tree, poly_uint64); extern tree get_mask_type_for_scalar_type (vec_info *, tree); extern tree get_same_sized_vectype (tree, tree); extern bool vect_get_loop_mask_type (loop_vec_info); </cut>

4 years, 10 months

1
0
0 0

[ACTIVITY] week ending Aug. 15 2021

by Alex Bennée

VirtIO Initiative ([STR-9]) =========================== - posted Enabling hypervisor agnosticism for VirtIO backends Message-Id: <87v94ldrqq.fsf(a)linaro.org> VirtIO RPMB ([STR-5]) - made more progress and now have PROGRAM_KEY/WRITE_COUNTER done - feels like it's getting faster [hacking branch] <https://github.com/stsquad/virtio-rpmb/tree/hacking> Fix VirtIO spec as per Rucha's email ------------------------------------ QEMU Upstream Work ([UM-2]) =========================== - posted [PATCH for 6.1-rc3 v1 0/4] gitlab and plugins pre-PR Message-Id: <20210806141015.2487502-1-alex.bennee(a)linaro.org> - prepared a potential [pull request for testing issues] but looks like it will wait for 6.2 [pull request for testing issues] <https://github.com/stsquad/qemu/tree/pr/120821-for-6.1-rc4-1> Write a generic overview of vhost user usage for the manual Enable plugins by default on TCG builds - [X] clean-up testing matrix Completed Reviews [10/10] ========================= [PATCH 00/13] new plugin argument passing scheme Message-Id: <20210717100920.240793-1-ma.mandourr(a)gmail.com> [PATCH 0/9] new plugin argument passing scheme Message-Id: <20210716080345.136784-1-ma.mandourr(a)gmail.com> [RFC PATCH] Subject: [RFC PATCH] plugins: Passed the parsed arguments directly to plugins Message-Id: <20210623155553.481099-1-ma.mandourr(a)gmail.com> [PATCH 3/6] plugins/cache: Fixed a use-after-free bug with multithreaded usermode Message-Id: <20210714172151.8494-4-ma.mandourr(a)gmail.com> [PATCH v8] tests/tcg/s390x: Test SIGILL and SIGSEGV handling Message-Id: <20210804225146.154513-1-iii(a)linux.ibm.com> [RFC PATCH v2] Add a post for the new TCG cache modelling plugin Message-Id: <20210617121707.764126-1-ma.mandourr(a)gmail.com> [PATCH for 6.1] plugins: do not limit exported symbols if modules are active Message-Id: <20210811100550.54714-1-pbonzini(a)redhat.com> [PATCH v4 00/13] new plugin argument passing scheme Message-Id: <20210730135817.17816-1-ma.mandourr(a)gmail.com> [PATCH 0/6] docs/devel: Organize devel manual into further subsections Message-Id: <20210804005621.1577302-1-jsnow(a)redhat.com> [PATCH] Makefile: Fix cscope issues on MacOS and soft links Message-Id: <20210801171144.60412-1-peterx(a)redhat.com> Absences ======== - Another partial week - On holiday for rest of August Current Review Queue ==================== TODO [PATCH v3] accel/tcg: Clear PAGE_WRITE before translation Message-Id: <20210805204835.158918-1-iii(a)linux.ibm.com> ===================================================================================================================== TODO [PATCH 0/7] tcg: some small towards more modular tcg Message-Id: <20210804143826.3402872-1-kraxel(a)redhat.com> ================================================================================================================= TODO [PATCH 0/2] Acceptance Tests: clean up of temporary dirs and MAINTAINERS entry Message-Id: <20210803193447.3946219-1-crosa(a)redhat.com> ========================================================================================================================================== TODO [PATCH v2 00/11] Atomic cleanup + clang-12 build fix Message-Id: <20210717014121.1784956-1-richard.henderson(a)linaro.org> ============================================================================================================================ -- Alex Bennée

4 years, 10 months

1
0
0 0

[ACTIVITY] report week ending 13 Aug

by Peter Maydell

Progress: * UM-2 [QEMU upstream maintainership] + Getting rc3 out of the door + Finished the systick timer refactoring series and sent it out for review (it ended up weighing in at 25 patches...) + Worked through some Coverity issue reports to analyze them and either close as false-positive or send out patches fixing them -- PMM

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/llvm-master-aarch64-spec2k6-O2 - Build # 14 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O2. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O2 Culprit: <cut> commit 2db8461a9492cb64046a085f35048b9c4e45bfc2 Author: Raphael Isemann <teemperor(a)gmail.com> Date: Tue Aug 10 16:15:57 2021 +0200 [lldb][NFC] Fix inversed documentation of Process::GetID/SetID </cut> Results regressed to (for first_bad == 2db8461a9492cb64046a085f35048b9c4e45bfc2) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2 artifacts/build-2db8461a9492cb64046a085f35048b9c4e45bfc2/results_id: 1 # 453.povray,[.] _ZN3povL24All_Sphere_IntersectionsEPNS_13Objec regressed by 116 from (for last_good == 9900af52f6b186a260d83321791177728fb369c5) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2 artifacts/build-9900af52f6b186a260d83321791177728fb369c5/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O2/3417 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O2/3391 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-2db8461a9492cb64046a085f35048b9c4e45bfc2 cd investigate-llvm-2db8461a9492cb64046a085f35048b9c4e45bfc2 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach 2db8461a9492cb64046a085f35048b9c4e45bfc2 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 9900af52f6b186a260d83321791177728fb369c5 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Full commit (up to 1000 lines): <cut> commit 2db8461a9492cb64046a085f35048b9c4e45bfc2 Author: Raphael Isemann <teemperor(a)gmail.com> Date: Tue Aug 10 16:15:57 2021 +0200 [lldb][NFC] Fix inversed documentation of Process::GetID/SetID --- lldb/include/lldb/Target/Process.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index aaa2470d2931..8dcc15b1667b 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -536,13 +536,13 @@ public: uint32_t GetAddressByteSize() const; + /// Returns the pid of the process or LLDB_INVALID_PROCESS_ID if there is + /// no known pid. + lldb::pid_t GetID() const { return m_pid; } + /// Sets the stored pid. /// /// This does not change the pid of underlying process. - lldb::pid_t GetID() const { return m_pid; } - - /// Returns the pid of the process or LLDB_INVALID_PROCESS_ID if there is - /// no known pid. void SetID(lldb::pid_t new_pid) { m_pid = new_pid; } uint32_t GetUniqueID() const { return m_process_unique_id; } </cut>

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_kernel/llvm-master-arm-next-allmodconfig - Build # 20 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *linux* in CI configuration tcwg_kernel/llvm-master-arm-next-allmodconfig. So far, this commit has regressed CI configurations: - tcwg_kernel/llvm-master-arm-next-allmodconfig Culprit: <cut> commit 4a2b285e7e103d4d6c6ed3e5052a0ff74a5d7f15 Author: Eric Dumazet <edumazet(a)google.com> Date: Tue Aug 10 02:45:47 2021 -0700 net: igmp: fix data-race in igmp_ifc_timer_expire() Fix the data-race reported by syzbot [1] Issue here is that igmp_ifc_timer_expire() can update in_dev->mr_ifc_count while another change just occured from another context. in_dev->mr_ifc_count is only 8bit wide, so the race had little consequences. [1] BUG: KCSAN: data-race in igmp_ifc_event / igmp_ifc_timer_expire write to 0xffff8881051e3062 of 1 bytes by task 12547 on cpu 0: igmp_ifc_event+0x1d5/0x290 net/ipv4/igmp.c:821 igmp_group_added+0x462/0x490 net/ipv4/igmp.c:1356 ____ip_mc_inc_group+0x3ff/0x500 net/ipv4/igmp.c:1461 __ip_mc_join_group+0x24d/0x2c0 net/ipv4/igmp.c:2199 ip_mc_join_group_ssm+0x20/0x30 net/ipv4/igmp.c:2218 do_ip_setsockopt net/ipv4/ip_sockglue.c:1285 [inline] ip_setsockopt+0x1827/0x2a80 net/ipv4/ip_sockglue.c:1423 tcp_setsockopt+0x8c/0xa0 net/ipv4/tcp.c:3657 sock_common_setsockopt+0x5d/0x70 net/core/sock.c:3362 __sys_setsockopt+0x18f/0x200 net/socket.c:2159 __do_sys_setsockopt net/socket.c:2170 [inline] __se_sys_setsockopt net/socket.c:2167 [inline] __x64_sys_setsockopt+0x62/0x70 net/socket.c:2167 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff8881051e3062 of 1 bytes by interrupt on cpu 1: igmp_ifc_timer_expire+0x706/0xa30 net/ipv4/igmp.c:808 call_timer_fn+0x2e/0x1d0 kernel/time/timer.c:1419 expire_timers+0x135/0x250 kernel/time/timer.c:1464 __run_timers+0x358/0x420 kernel/time/timer.c:1732 run_timer_softirq+0x19/0x30 kernel/time/timer.c:1745 __do_softirq+0x12c/0x26e kernel/softirq.c:558 invoke_softirq kernel/softirq.c:432 [inline] __irq_exit_rcu+0x9a/0xb0 kernel/softirq.c:636 sysvec_apic_timer_interrupt+0x69/0x80 arch/x86/kernel/apic/apic.c:1100 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:638 console_unlock+0x8e8/0xb30 kernel/printk/printk.c:2646 vprintk_emit+0x125/0x3d0 kernel/printk/printk.c:2174 vprintk_default+0x22/0x30 kernel/printk/printk.c:2185 vprintk+0x15a/0x170 kernel/printk/printk_safe.c:392 printk+0x62/0x87 kernel/printk/printk.c:2216 selinux_netlink_send+0x399/0x400 security/selinux/hooks.c:6041 security_netlink_send+0x42/0x90 security/security.c:2070 netlink_sendmsg+0x59e/0x7c0 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:703 [inline] sock_sendmsg net/socket.c:723 [inline] ____sys_sendmsg+0x360/0x4d0 net/socket.c:2392 ___sys_sendmsg net/socket.c:2446 [inline] __sys_sendmsg+0x1ed/0x270 net/socket.c:2475 __do_sys_sendmsg net/socket.c:2484 [inline] __se_sys_sendmsg net/socket.c:2482 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2482 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x01 -> 0x02 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 12539 Comm: syz-executor.1 Not tainted 5.14.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet <edumazet(a)google.com> Reported-by: syzbot <syzkaller(a)googlegroups.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> </cut> Results regressed to (for first_bad == 4a2b285e7e103d4d6c6ed3e5052a0ff74a5d7f15) # reset_artifacts: -10 # build_abe binutils: -9 # build_llvm: -5 # build_abe qemu: -2 # linux_n_obj: 21692 # First few build errors in logs: # 00:03:56 ld.lld: error: undefined symbol: __bad_cmpxchg # 00:03:56 make: *** [Makefile:1176: vmlinux] Error 1 from (for last_good == 37c86c4a0bfc2faaf0ed959db9de814c85797f09) # reset_artifacts: -10 # build_abe binutils: -9 # build_llvm: -5 # build_abe qemu: -2 # linux_n_obj: 29753 # linux build successful: all Artifacts of last_good build: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-arm-next-allm… Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-arm-next-allm… Build top page/logs: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-arm-next-allm… Configuration details: rr[linux_git]="https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git#761c6d7…" Reproduce builds: <cut> mkdir investigate-linux-4a2b285e7e103d4d6c6ed3e5052a0ff74a5d7f15 cd investigate-linux-4a2b285e7e103d4d6c6ed3e5052a0ff74a5d7f15 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-arm-next-allm… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-arm-next-allm… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-arm-next-allm… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_kernel-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /linux/ ./ ./bisect/baseline/ cd linux # Reproduce first_bad build git checkout --detach 4a2b285e7e103d4d6c6ed3e5052a0ff74a5d7f15 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 37c86c4a0bfc2faaf0ed959db9de814c85797f09 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-arm-next-allm… Build log: https://ci.linaro.org/job/tcwg_kernel-llvm-bisect-llvm-master-arm-next-allm… Full commit (up to 1000 lines): <cut> commit 4a2b285e7e103d4d6c6ed3e5052a0ff74a5d7f15 Author: Eric Dumazet <edumazet(a)google.com> Date: Tue Aug 10 02:45:47 2021 -0700 net: igmp: fix data-race in igmp_ifc_timer_expire() Fix the data-race reported by syzbot [1] Issue here is that igmp_ifc_timer_expire() can update in_dev->mr_ifc_count while another change just occured from another context. in_dev->mr_ifc_count is only 8bit wide, so the race had little consequences. [1] BUG: KCSAN: data-race in igmp_ifc_event / igmp_ifc_timer_expire write to 0xffff8881051e3062 of 1 bytes by task 12547 on cpu 0: igmp_ifc_event+0x1d5/0x290 net/ipv4/igmp.c:821 igmp_group_added+0x462/0x490 net/ipv4/igmp.c:1356 ____ip_mc_inc_group+0x3ff/0x500 net/ipv4/igmp.c:1461 __ip_mc_join_group+0x24d/0x2c0 net/ipv4/igmp.c:2199 ip_mc_join_group_ssm+0x20/0x30 net/ipv4/igmp.c:2218 do_ip_setsockopt net/ipv4/ip_sockglue.c:1285 [inline] ip_setsockopt+0x1827/0x2a80 net/ipv4/ip_sockglue.c:1423 tcp_setsockopt+0x8c/0xa0 net/ipv4/tcp.c:3657 sock_common_setsockopt+0x5d/0x70 net/core/sock.c:3362 __sys_setsockopt+0x18f/0x200 net/socket.c:2159 __do_sys_setsockopt net/socket.c:2170 [inline] __se_sys_setsockopt net/socket.c:2167 [inline] __x64_sys_setsockopt+0x62/0x70 net/socket.c:2167 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff8881051e3062 of 1 bytes by interrupt on cpu 1: igmp_ifc_timer_expire+0x706/0xa30 net/ipv4/igmp.c:808 call_timer_fn+0x2e/0x1d0 kernel/time/timer.c:1419 expire_timers+0x135/0x250 kernel/time/timer.c:1464 __run_timers+0x358/0x420 kernel/time/timer.c:1732 run_timer_softirq+0x19/0x30 kernel/time/timer.c:1745 __do_softirq+0x12c/0x26e kernel/softirq.c:558 invoke_softirq kernel/softirq.c:432 [inline] __irq_exit_rcu+0x9a/0xb0 kernel/softirq.c:636 sysvec_apic_timer_interrupt+0x69/0x80 arch/x86/kernel/apic/apic.c:1100 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:638 console_unlock+0x8e8/0xb30 kernel/printk/printk.c:2646 vprintk_emit+0x125/0x3d0 kernel/printk/printk.c:2174 vprintk_default+0x22/0x30 kernel/printk/printk.c:2185 vprintk+0x15a/0x170 kernel/printk/printk_safe.c:392 printk+0x62/0x87 kernel/printk/printk.c:2216 selinux_netlink_send+0x399/0x400 security/selinux/hooks.c:6041 security_netlink_send+0x42/0x90 security/security.c:2070 netlink_sendmsg+0x59e/0x7c0 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:703 [inline] sock_sendmsg net/socket.c:723 [inline] ____sys_sendmsg+0x360/0x4d0 net/socket.c:2392 ___sys_sendmsg net/socket.c:2446 [inline] __sys_sendmsg+0x1ed/0x270 net/socket.c:2475 __do_sys_sendmsg net/socket.c:2484 [inline] __se_sys_sendmsg net/socket.c:2482 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2482 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x01 -> 0x02 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 12539 Comm: syz-executor.1 Not tainted 5.14.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet <edumazet(a)google.com> Reported-by: syzbot <syzkaller(a)googlegroups.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> --- net/ipv4/igmp.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6b3c558a4f23..a51360087b19 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -803,10 +803,17 @@ static void igmp_gq_timer_expire(struct timer_list *t) static void igmp_ifc_timer_expire(struct timer_list *t) { struct in_device *in_dev = from_timer(in_dev, t, mr_ifc_timer); + u8 mr_ifc_count; igmpv3_send_cr(in_dev); - if (in_dev->mr_ifc_count) { - in_dev->mr_ifc_count--; +restart: + mr_ifc_count = READ_ONCE(in_dev->mr_ifc_count); + + if (mr_ifc_count) { + if (cmpxchg(&in_dev->mr_ifc_count, + mr_ifc_count, + mr_ifc_count - 1) != mr_ifc_count) + goto restart; igmp_ifc_start_timer(in_dev, unsolicited_report_interval(in_dev)); } @@ -818,7 +825,7 @@ static void igmp_ifc_event(struct in_device *in_dev) struct net *net = dev_net(in_dev->dev); if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) return; - in_dev->mr_ifc_count = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv); igmp_ifc_start_timer(in_dev, 1); } @@ -957,7 +964,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, in_dev->mr_qri; } /* cancel the interface change timer */ - in_dev->mr_ifc_count = 0; + WRITE_ONCE(in_dev->mr_ifc_count, 0); if (del_timer(&in_dev->mr_ifc_timer)) __in_dev_put(in_dev); /* clear deleted report items */ @@ -1724,7 +1731,7 @@ void ip_mc_down(struct in_device *in_dev) igmp_group_dropped(pmc); #ifdef CONFIG_IP_MULTICAST - in_dev->mr_ifc_count = 0; + WRITE_ONCE(in_dev->mr_ifc_count, 0); if (del_timer(&in_dev->mr_ifc_timer)) __in_dev_put(in_dev); in_dev->mr_gq_running = 0; @@ -1941,7 +1948,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->sfmode = MCAST_INCLUDE; #ifdef CONFIG_IP_MULTICAST pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; - in_dev->mr_ifc_count = pmc->crcount; + WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; igmp_ifc_event(pmc->interface); @@ -2120,7 +2127,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, /* else no filters; keep old mode for reports */ pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; - in_dev->mr_ifc_count = pmc->crcount; + WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; igmp_ifc_event(in_dev); </cut>

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_kernel/gnu-master-arm-next-allyesconfig - Build # 34 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *linux* in CI configuration tcwg_kernel/gnu-master-arm-next-allyesconfig. So far, this commit has regressed CI configurations: - tcwg_kernel/gnu-master-arm-next-allyesconfig Culprit: <cut> commit 4a2b285e7e103d4d6c6ed3e5052a0ff74a5d7f15 Author: Eric Dumazet <edumazet(a)google.com> Date: Tue Aug 10 02:45:47 2021 -0700 net: igmp: fix data-race in igmp_ifc_timer_expire() Fix the data-race reported by syzbot [1] Issue here is that igmp_ifc_timer_expire() can update in_dev->mr_ifc_count while another change just occured from another context. in_dev->mr_ifc_count is only 8bit wide, so the race had little consequences. [1] BUG: KCSAN: data-race in igmp_ifc_event / igmp_ifc_timer_expire write to 0xffff8881051e3062 of 1 bytes by task 12547 on cpu 0: igmp_ifc_event+0x1d5/0x290 net/ipv4/igmp.c:821 igmp_group_added+0x462/0x490 net/ipv4/igmp.c:1356 ____ip_mc_inc_group+0x3ff/0x500 net/ipv4/igmp.c:1461 __ip_mc_join_group+0x24d/0x2c0 net/ipv4/igmp.c:2199 ip_mc_join_group_ssm+0x20/0x30 net/ipv4/igmp.c:2218 do_ip_setsockopt net/ipv4/ip_sockglue.c:1285 [inline] ip_setsockopt+0x1827/0x2a80 net/ipv4/ip_sockglue.c:1423 tcp_setsockopt+0x8c/0xa0 net/ipv4/tcp.c:3657 sock_common_setsockopt+0x5d/0x70 net/core/sock.c:3362 __sys_setsockopt+0x18f/0x200 net/socket.c:2159 __do_sys_setsockopt net/socket.c:2170 [inline] __se_sys_setsockopt net/socket.c:2167 [inline] __x64_sys_setsockopt+0x62/0x70 net/socket.c:2167 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff8881051e3062 of 1 bytes by interrupt on cpu 1: igmp_ifc_timer_expire+0x706/0xa30 net/ipv4/igmp.c:808 call_timer_fn+0x2e/0x1d0 kernel/time/timer.c:1419 expire_timers+0x135/0x250 kernel/time/timer.c:1464 __run_timers+0x358/0x420 kernel/time/timer.c:1732 run_timer_softirq+0x19/0x30 kernel/time/timer.c:1745 __do_softirq+0x12c/0x26e kernel/softirq.c:558 invoke_softirq kernel/softirq.c:432 [inline] __irq_exit_rcu+0x9a/0xb0 kernel/softirq.c:636 sysvec_apic_timer_interrupt+0x69/0x80 arch/x86/kernel/apic/apic.c:1100 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:638 console_unlock+0x8e8/0xb30 kernel/printk/printk.c:2646 vprintk_emit+0x125/0x3d0 kernel/printk/printk.c:2174 vprintk_default+0x22/0x30 kernel/printk/printk.c:2185 vprintk+0x15a/0x170 kernel/printk/printk_safe.c:392 printk+0x62/0x87 kernel/printk/printk.c:2216 selinux_netlink_send+0x399/0x400 security/selinux/hooks.c:6041 security_netlink_send+0x42/0x90 security/security.c:2070 netlink_sendmsg+0x59e/0x7c0 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:703 [inline] sock_sendmsg net/socket.c:723 [inline] ____sys_sendmsg+0x360/0x4d0 net/socket.c:2392 ___sys_sendmsg net/socket.c:2446 [inline] __sys_sendmsg+0x1ed/0x270 net/socket.c:2475 __do_sys_sendmsg net/socket.c:2484 [inline] __se_sys_sendmsg net/socket.c:2482 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2482 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x01 -> 0x02 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 12539 Comm: syz-executor.1 Not tainted 5.14.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet <edumazet(a)google.com> Reported-by: syzbot <syzkaller(a)googlegroups.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> </cut> Results regressed to (for first_bad == 4a2b285e7e103d4d6c6ed3e5052a0ff74a5d7f15) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1: -5 # build_abe qemu: -2 # linux_n_obj: 19624 # First few build errors in logs: # 00:49:46 igmp.c:(.text+0xa6f4): undefined reference to `__bad_cmpxchg' # 00:49:48 make: *** [Makefile:1176: vmlinux] Error 1 from (for last_good == 37c86c4a0bfc2faaf0ed959db9de814c85797f09) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1: -5 # build_abe qemu: -2 # linux_n_obj: 19709 # linux build successful: all Artifacts of last_good build: https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-next-allyes… Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-next-allyes… Build top page/logs: https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-next-allyes… Configuration details: rr[linux_git]="https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git#9e723c5…" Reproduce builds: <cut> mkdir investigate-linux-4a2b285e7e103d4d6c6ed3e5052a0ff74a5d7f15 cd investigate-linux-4a2b285e7e103d4d6c6ed3e5052a0ff74a5d7f15 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-next-allyes… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-next-allyes… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-next-allyes… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_kernel-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /linux/ ./ ./bisect/baseline/ cd linux # Reproduce first_bad build git checkout --detach 4a2b285e7e103d4d6c6ed3e5052a0ff74a5d7f15 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 37c86c4a0bfc2faaf0ed959db9de814c85797f09 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-next-allyes… Build log: https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-next-allyes… Full commit (up to 1000 lines): <cut> commit 4a2b285e7e103d4d6c6ed3e5052a0ff74a5d7f15 Author: Eric Dumazet <edumazet(a)google.com> Date: Tue Aug 10 02:45:47 2021 -0700 net: igmp: fix data-race in igmp_ifc_timer_expire() Fix the data-race reported by syzbot [1] Issue here is that igmp_ifc_timer_expire() can update in_dev->mr_ifc_count while another change just occured from another context. in_dev->mr_ifc_count is only 8bit wide, so the race had little consequences. [1] BUG: KCSAN: data-race in igmp_ifc_event / igmp_ifc_timer_expire write to 0xffff8881051e3062 of 1 bytes by task 12547 on cpu 0: igmp_ifc_event+0x1d5/0x290 net/ipv4/igmp.c:821 igmp_group_added+0x462/0x490 net/ipv4/igmp.c:1356 ____ip_mc_inc_group+0x3ff/0x500 net/ipv4/igmp.c:1461 __ip_mc_join_group+0x24d/0x2c0 net/ipv4/igmp.c:2199 ip_mc_join_group_ssm+0x20/0x30 net/ipv4/igmp.c:2218 do_ip_setsockopt net/ipv4/ip_sockglue.c:1285 [inline] ip_setsockopt+0x1827/0x2a80 net/ipv4/ip_sockglue.c:1423 tcp_setsockopt+0x8c/0xa0 net/ipv4/tcp.c:3657 sock_common_setsockopt+0x5d/0x70 net/core/sock.c:3362 __sys_setsockopt+0x18f/0x200 net/socket.c:2159 __do_sys_setsockopt net/socket.c:2170 [inline] __se_sys_setsockopt net/socket.c:2167 [inline] __x64_sys_setsockopt+0x62/0x70 net/socket.c:2167 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff8881051e3062 of 1 bytes by interrupt on cpu 1: igmp_ifc_timer_expire+0x706/0xa30 net/ipv4/igmp.c:808 call_timer_fn+0x2e/0x1d0 kernel/time/timer.c:1419 expire_timers+0x135/0x250 kernel/time/timer.c:1464 __run_timers+0x358/0x420 kernel/time/timer.c:1732 run_timer_softirq+0x19/0x30 kernel/time/timer.c:1745 __do_softirq+0x12c/0x26e kernel/softirq.c:558 invoke_softirq kernel/softirq.c:432 [inline] __irq_exit_rcu+0x9a/0xb0 kernel/softirq.c:636 sysvec_apic_timer_interrupt+0x69/0x80 arch/x86/kernel/apic/apic.c:1100 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:638 console_unlock+0x8e8/0xb30 kernel/printk/printk.c:2646 vprintk_emit+0x125/0x3d0 kernel/printk/printk.c:2174 vprintk_default+0x22/0x30 kernel/printk/printk.c:2185 vprintk+0x15a/0x170 kernel/printk/printk_safe.c:392 printk+0x62/0x87 kernel/printk/printk.c:2216 selinux_netlink_send+0x399/0x400 security/selinux/hooks.c:6041 security_netlink_send+0x42/0x90 security/security.c:2070 netlink_sendmsg+0x59e/0x7c0 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:703 [inline] sock_sendmsg net/socket.c:723 [inline] ____sys_sendmsg+0x360/0x4d0 net/socket.c:2392 ___sys_sendmsg net/socket.c:2446 [inline] __sys_sendmsg+0x1ed/0x270 net/socket.c:2475 __do_sys_sendmsg net/socket.c:2484 [inline] __se_sys_sendmsg net/socket.c:2482 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2482 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x01 -> 0x02 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 12539 Comm: syz-executor.1 Not tainted 5.14.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet <edumazet(a)google.com> Reported-by: syzbot <syzkaller(a)googlegroups.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> --- net/ipv4/igmp.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6b3c558a4f23..a51360087b19 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -803,10 +803,17 @@ static void igmp_gq_timer_expire(struct timer_list *t) static void igmp_ifc_timer_expire(struct timer_list *t) { struct in_device *in_dev = from_timer(in_dev, t, mr_ifc_timer); + u8 mr_ifc_count; igmpv3_send_cr(in_dev); - if (in_dev->mr_ifc_count) { - in_dev->mr_ifc_count--; +restart: + mr_ifc_count = READ_ONCE(in_dev->mr_ifc_count); + + if (mr_ifc_count) { + if (cmpxchg(&in_dev->mr_ifc_count, + mr_ifc_count, + mr_ifc_count - 1) != mr_ifc_count) + goto restart; igmp_ifc_start_timer(in_dev, unsolicited_report_interval(in_dev)); } @@ -818,7 +825,7 @@ static void igmp_ifc_event(struct in_device *in_dev) struct net *net = dev_net(in_dev->dev); if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) return; - in_dev->mr_ifc_count = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv); igmp_ifc_start_timer(in_dev, 1); } @@ -957,7 +964,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, in_dev->mr_qri; } /* cancel the interface change timer */ - in_dev->mr_ifc_count = 0; + WRITE_ONCE(in_dev->mr_ifc_count, 0); if (del_timer(&in_dev->mr_ifc_timer)) __in_dev_put(in_dev); /* clear deleted report items */ @@ -1724,7 +1731,7 @@ void ip_mc_down(struct in_device *in_dev) igmp_group_dropped(pmc); #ifdef CONFIG_IP_MULTICAST - in_dev->mr_ifc_count = 0; + WRITE_ONCE(in_dev->mr_ifc_count, 0); if (del_timer(&in_dev->mr_ifc_timer)) __in_dev_put(in_dev); in_dev->mr_gq_running = 0; @@ -1941,7 +1948,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->sfmode = MCAST_INCLUDE; #ifdef CONFIG_IP_MULTICAST pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; - in_dev->mr_ifc_count = pmc->crcount; + WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; igmp_ifc_event(pmc->interface); @@ -2120,7 +2127,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, /* else no filters; keep old mode for reports */ pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; - in_dev->mr_ifc_count = pmc->crcount; + WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; igmp_ifc_event(in_dev); </cut>

4 years, 10 months

1
0
0 0

GDB aarch64 malfunctions w/Linaro / ARM gcc 10.3 compiler

by Dietmar May

I'm compiling and running a bare metal AArch64 bootloader using 3 different compilers: the Linaro / ARM GCC 10.3.1 compiler, the Linaro / ARM GCC 10.2.1 compiler, and an in-house built GCC 10.2.0 compiler. GDB will single step using the either of the GCC 10.2 compilers; but runs without halting when step is requested - or perhaps steps multiple instructions - when built using the Linaro / ARM-supplied GCC 10.3.1. Eclipse CDT (v4.20 aka 2021-06) is able to correlate debugging information from binaries built with either of the gcc 10.2 toolchains, and to single step correctly through the program. Breakpoints work as expected. Registers display fine. Eclipse CDT is not able to correlate current PC location to source code using the binary built with Linaro / ARM 10.3, instead bringing up a disassembly window. Breakpoints placed at assembly instructions in the editor do not work. I've tried three different GDB versions - ARM's supplied 10.2 and 10.3 GDB, and the in-house built GDB. Results are the same. The same makefile is used to create the binaries, with just a few macro definitions to switch. The only compiler flag of interest is -march=armv8.2-a (and of course -g -O0). -mtune=cortex-a53 doesn't help. The board is connected via JTAG using OpenOCD 0.11.0+ and an Olimex ARM-USB-OCD-H adapter. I'm building in a cygwin shell on Windows 10 version 21H1 using the compilers: gcc-arm-10.3-2021.07-mingw-w64-i686-aarch64-none-elf.tar.xz gcc-arm-10.2-2020.11-mingw-w64-i686-aarch64-none-elf.tar.xz downloaded from: https://developer.arm.com/tools-and-software/open-source-software/developer… Differences in compiler configuration (gcc -v) are: Failing - Linaro / ARM GCC 10.3(.1): --enable-checking=release --target=aarch64-none-elf --with-libiconv-prefix=/data/jenkins/workspace/GNU-toolchain/arm-10-4/build-mingw-aarch64-none-elf/host-tools Working - in house GCC 10.2.1: --build=x86_64-w64-mingw32 --disable-libffi --disable-libgomp --disable-libmudflap --disable-libssp --disable-libstdcxx-pch --disable-lto --disable-win32-registry --enable-multilib --target=aarch64-elf --with-gcc --with-gnu-as --with-gnu-ld --with-host-libstdcxx='-static-libgcc -Wl,-Bstatic,-lstdc++,-Bdynamic -lm' --with-multilib-list=lp64,ilp32 --with-stabs --with-sysroot=/build/aarch64-elf_10.2.0/cross-gcc/aarch64-elf --with-zstd=/build/aarch64-elf_10.2.0/host Has anyone been able to perform hardware debugging of binaries built with the latest 10.3 builds using GDB (and maybe even Eclipse CDT)? Any suggestions as to other steps to try? Thanks.

4 years, 10 months

2
1
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/llvm-master-aarch64-spec2k6-O3 - Build # 18 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O3. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O3 Culprit: <cut> commit b4c0307d598004cfd96c770d2a4a84a37c838ba9 Author: Jon Roelofs <jonathan_roelofs(a)apple.com> Date: Thu Aug 5 09:35:02 2021 -0700 Fix clang-interpreter build after 2487db1f286222e2501c2fa8e8244eda13f6afc3 </cut> Results regressed to (for first_bad == b4c0307d598004cfd96c770d2a4a84a37c838ba9) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3 artifacts/build-b4c0307d598004cfd96c770d2a4a84a37c838ba9/results_id: 1 # 470.lbm,lbm_base.default regressed by 109 from (for last_good == bd17ced1db9a674fc8aa6632899e245672c7aa35) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3 artifacts/build-bd17ced1db9a674fc8aa6632899e245672c7aa35/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O3/3351 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O3/3314 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-b4c0307d598004cfd96c770d2a4a84a37c838ba9 cd investigate-llvm-b4c0307d598004cfd96c770d2a4a84a37c838ba9 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach b4c0307d598004cfd96c770d2a4a84a37c838ba9 ../artifacts/test.sh # Reproduce last_good build git checkout --detach bd17ced1db9a674fc8aa6632899e245672c7aa35 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Full commit (up to 1000 lines): <cut> commit b4c0307d598004cfd96c770d2a4a84a37c838ba9 Author: Jon Roelofs <jonathan_roelofs(a)apple.com> Date: Thu Aug 5 09:35:02 2021 -0700 Fix clang-interpreter build after 2487db1f286222e2501c2fa8e8244eda13f6afc3 --- clang/examples/clang-interpreter/main.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/examples/clang-interpreter/main.cpp b/clang/examples/clang-interpreter/main.cpp index 342d42089472..a2c50167f6b1 100644 --- a/clang/examples/clang-interpreter/main.cpp +++ b/clang/examples/clang-interpreter/main.cpp @@ -66,7 +66,8 @@ private: SimpleJIT( std::unique_ptr<TargetMachine> TM, DataLayout DL, std::unique_ptr<DynamicLibrarySearchGenerator> ProcessSymbolsGenerator) - : TM(std::move(TM)), DL(std::move(DL)) { + : ES(cantFail(SelfExecutorProcessControl::Create())), TM(std::move(TM)), + DL(std::move(DL)) { llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr); MainJD.addGenerator(std::move(ProcessSymbolsGenerator)); } </cut>

4 years, 10 months

1
0
0 0

[CI-NOTIFY]: TCWG Bisect tcwg_kernel/gnu-master-arm-next-allmodconfig - Build # 36 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *linux* in CI configuration tcwg_kernel/gnu-master-arm-next-allmodconfig. So far, this commit has regressed CI configurations: - tcwg_kernel/gnu-master-arm-next-allmodconfig Culprit: <cut> commit 4a2b285e7e103d4d6c6ed3e5052a0ff74a5d7f15 Author: Eric Dumazet <edumazet(a)google.com> Date: Tue Aug 10 02:45:47 2021 -0700 net: igmp: fix data-race in igmp_ifc_timer_expire() Fix the data-race reported by syzbot [1] Issue here is that igmp_ifc_timer_expire() can update in_dev->mr_ifc_count while another change just occured from another context. in_dev->mr_ifc_count is only 8bit wide, so the race had little consequences. [1] BUG: KCSAN: data-race in igmp_ifc_event / igmp_ifc_timer_expire write to 0xffff8881051e3062 of 1 bytes by task 12547 on cpu 0: igmp_ifc_event+0x1d5/0x290 net/ipv4/igmp.c:821 igmp_group_added+0x462/0x490 net/ipv4/igmp.c:1356 ____ip_mc_inc_group+0x3ff/0x500 net/ipv4/igmp.c:1461 __ip_mc_join_group+0x24d/0x2c0 net/ipv4/igmp.c:2199 ip_mc_join_group_ssm+0x20/0x30 net/ipv4/igmp.c:2218 do_ip_setsockopt net/ipv4/ip_sockglue.c:1285 [inline] ip_setsockopt+0x1827/0x2a80 net/ipv4/ip_sockglue.c:1423 tcp_setsockopt+0x8c/0xa0 net/ipv4/tcp.c:3657 sock_common_setsockopt+0x5d/0x70 net/core/sock.c:3362 __sys_setsockopt+0x18f/0x200 net/socket.c:2159 __do_sys_setsockopt net/socket.c:2170 [inline] __se_sys_setsockopt net/socket.c:2167 [inline] __x64_sys_setsockopt+0x62/0x70 net/socket.c:2167 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff8881051e3062 of 1 bytes by interrupt on cpu 1: igmp_ifc_timer_expire+0x706/0xa30 net/ipv4/igmp.c:808 call_timer_fn+0x2e/0x1d0 kernel/time/timer.c:1419 expire_timers+0x135/0x250 kernel/time/timer.c:1464 __run_timers+0x358/0x420 kernel/time/timer.c:1732 run_timer_softirq+0x19/0x30 kernel/time/timer.c:1745 __do_softirq+0x12c/0x26e kernel/softirq.c:558 invoke_softirq kernel/softirq.c:432 [inline] __irq_exit_rcu+0x9a/0xb0 kernel/softirq.c:636 sysvec_apic_timer_interrupt+0x69/0x80 arch/x86/kernel/apic/apic.c:1100 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:638 console_unlock+0x8e8/0xb30 kernel/printk/printk.c:2646 vprintk_emit+0x125/0x3d0 kernel/printk/printk.c:2174 vprintk_default+0x22/0x30 kernel/printk/printk.c:2185 vprintk+0x15a/0x170 kernel/printk/printk_safe.c:392 printk+0x62/0x87 kernel/printk/printk.c:2216 selinux_netlink_send+0x399/0x400 security/selinux/hooks.c:6041 security_netlink_send+0x42/0x90 security/security.c:2070 netlink_sendmsg+0x59e/0x7c0 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:703 [inline] sock_sendmsg net/socket.c:723 [inline] ____sys_sendmsg+0x360/0x4d0 net/socket.c:2392 ___sys_sendmsg net/socket.c:2446 [inline] __sys_sendmsg+0x1ed/0x270 net/socket.c:2475 __do_sys_sendmsg net/socket.c:2484 [inline] __se_sys_sendmsg net/socket.c:2482 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2482 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x01 -> 0x02 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 12539 Comm: syz-executor.1 Not tainted 5.14.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet <edumazet(a)google.com> Reported-by: syzbot <syzkaller(a)googlegroups.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> </cut> Results regressed to (for first_bad == 4a2b285e7e103d4d6c6ed3e5052a0ff74a5d7f15) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1: -5 # build_abe qemu: -2 # linux_n_obj: 21598 # First few build errors in logs: # 00:32:42 igmp.c:(.text+0xa734): undefined reference to `__bad_cmpxchg' # 00:32:42 make: *** [Makefile:1176: vmlinux] Error 1 from (for last_good == 37c86c4a0bfc2faaf0ed959db9de814c85797f09) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1: -5 # build_abe qemu: -2 # linux_n_obj: 29650 # linux build successful: all Artifacts of last_good build: https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-next-allmod… Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-next-allmod… Build top page/logs: https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-next-allmod… Configuration details: rr[linux_git]="https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git#9e723c5…" Reproduce builds: <cut> mkdir investigate-linux-4a2b285e7e103d4d6c6ed3e5052a0ff74a5d7f15 cd investigate-linux-4a2b285e7e103d4d6c6ed3e5052a0ff74a5d7f15 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-next-allmod… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-next-allmod… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-next-allmod… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_kernel-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /linux/ ./ ./bisect/baseline/ cd linux # Reproduce first_bad build git checkout --detach 4a2b285e7e103d4d6c6ed3e5052a0ff74a5d7f15 ../artifacts/test.sh # Reproduce last_good build git checkout --detach 37c86c4a0bfc2faaf0ed959db9de814c85797f09 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-next-allmod… Build log: https://ci.linaro.org/job/tcwg_kernel-gnu-bisect-gnu-master-arm-next-allmod… Full commit (up to 1000 lines): <cut> commit 4a2b285e7e103d4d6c6ed3e5052a0ff74a5d7f15 Author: Eric Dumazet <edumazet(a)google.com> Date: Tue Aug 10 02:45:47 2021 -0700 net: igmp: fix data-race in igmp_ifc_timer_expire() Fix the data-race reported by syzbot [1] Issue here is that igmp_ifc_timer_expire() can update in_dev->mr_ifc_count while another change just occured from another context. in_dev->mr_ifc_count is only 8bit wide, so the race had little consequences. [1] BUG: KCSAN: data-race in igmp_ifc_event / igmp_ifc_timer_expire write to 0xffff8881051e3062 of 1 bytes by task 12547 on cpu 0: igmp_ifc_event+0x1d5/0x290 net/ipv4/igmp.c:821 igmp_group_added+0x462/0x490 net/ipv4/igmp.c:1356 ____ip_mc_inc_group+0x3ff/0x500 net/ipv4/igmp.c:1461 __ip_mc_join_group+0x24d/0x2c0 net/ipv4/igmp.c:2199 ip_mc_join_group_ssm+0x20/0x30 net/ipv4/igmp.c:2218 do_ip_setsockopt net/ipv4/ip_sockglue.c:1285 [inline] ip_setsockopt+0x1827/0x2a80 net/ipv4/ip_sockglue.c:1423 tcp_setsockopt+0x8c/0xa0 net/ipv4/tcp.c:3657 sock_common_setsockopt+0x5d/0x70 net/core/sock.c:3362 __sys_setsockopt+0x18f/0x200 net/socket.c:2159 __do_sys_setsockopt net/socket.c:2170 [inline] __se_sys_setsockopt net/socket.c:2167 [inline] __x64_sys_setsockopt+0x62/0x70 net/socket.c:2167 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff8881051e3062 of 1 bytes by interrupt on cpu 1: igmp_ifc_timer_expire+0x706/0xa30 net/ipv4/igmp.c:808 call_timer_fn+0x2e/0x1d0 kernel/time/timer.c:1419 expire_timers+0x135/0x250 kernel/time/timer.c:1464 __run_timers+0x358/0x420 kernel/time/timer.c:1732 run_timer_softirq+0x19/0x30 kernel/time/timer.c:1745 __do_softirq+0x12c/0x26e kernel/softirq.c:558 invoke_softirq kernel/softirq.c:432 [inline] __irq_exit_rcu+0x9a/0xb0 kernel/softirq.c:636 sysvec_apic_timer_interrupt+0x69/0x80 arch/x86/kernel/apic/apic.c:1100 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:638 console_unlock+0x8e8/0xb30 kernel/printk/printk.c:2646 vprintk_emit+0x125/0x3d0 kernel/printk/printk.c:2174 vprintk_default+0x22/0x30 kernel/printk/printk.c:2185 vprintk+0x15a/0x170 kernel/printk/printk_safe.c:392 printk+0x62/0x87 kernel/printk/printk.c:2216 selinux_netlink_send+0x399/0x400 security/selinux/hooks.c:6041 security_netlink_send+0x42/0x90 security/security.c:2070 netlink_sendmsg+0x59e/0x7c0 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:703 [inline] sock_sendmsg net/socket.c:723 [inline] ____sys_sendmsg+0x360/0x4d0 net/socket.c:2392 ___sys_sendmsg net/socket.c:2446 [inline] __sys_sendmsg+0x1ed/0x270 net/socket.c:2475 __do_sys_sendmsg net/socket.c:2484 [inline] __se_sys_sendmsg net/socket.c:2482 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2482 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x01 -> 0x02 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 12539 Comm: syz-executor.1 Not tainted 5.14.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet <edumazet(a)google.com> Reported-by: syzbot <syzkaller(a)googlegroups.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> --- net/ipv4/igmp.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6b3c558a4f23..a51360087b19 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -803,10 +803,17 @@ static void igmp_gq_timer_expire(struct timer_list *t) static void igmp_ifc_timer_expire(struct timer_list *t) { struct in_device *in_dev = from_timer(in_dev, t, mr_ifc_timer); + u8 mr_ifc_count; igmpv3_send_cr(in_dev); - if (in_dev->mr_ifc_count) { - in_dev->mr_ifc_count--; +restart: + mr_ifc_count = READ_ONCE(in_dev->mr_ifc_count); + + if (mr_ifc_count) { + if (cmpxchg(&in_dev->mr_ifc_count, + mr_ifc_count, + mr_ifc_count - 1) != mr_ifc_count) + goto restart; igmp_ifc_start_timer(in_dev, unsolicited_report_interval(in_dev)); } @@ -818,7 +825,7 @@ static void igmp_ifc_event(struct in_device *in_dev) struct net *net = dev_net(in_dev->dev); if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) return; - in_dev->mr_ifc_count = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv); igmp_ifc_start_timer(in_dev, 1); } @@ -957,7 +964,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, in_dev->mr_qri; } /* cancel the interface change timer */ - in_dev->mr_ifc_count = 0; + WRITE_ONCE(in_dev->mr_ifc_count, 0); if (del_timer(&in_dev->mr_ifc_timer)) __in_dev_put(in_dev); /* clear deleted report items */ @@ -1724,7 +1731,7 @@ void ip_mc_down(struct in_device *in_dev) igmp_group_dropped(pmc); #ifdef CONFIG_IP_MULTICAST - in_dev->mr_ifc_count = 0; + WRITE_ONCE(in_dev->mr_ifc_count, 0); if (del_timer(&in_dev->mr_ifc_timer)) __in_dev_put(in_dev); in_dev->mr_gq_running = 0; @@ -1941,7 +1948,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->sfmode = MCAST_INCLUDE; #ifdef CONFIG_IP_MULTICAST pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; - in_dev->mr_ifc_count = pmc->crcount; + WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; igmp_ifc_event(pmc->interface); @@ -2120,7 +2127,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, /* else no filters; keep old mode for reports */ pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; - in_dev->mr_ifc_count = pmc->crcount; + WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; igmp_ifc_event(in_dev); </cut>

4 years, 10 months

1
0
0 0

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

linaro-toolchain