linaro-toolchain

linaro-toolchain@lists.linaro.org

3 participants
5706 discussions

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/gnu-release-aarch64-spec2k6-O2 - Build # 22 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *gcc* in CI configuration tcwg_bmk_gnu_tx1/gnu-release-aarch64-spec2k6-O2. So far, this commit has regressed CI configurations: - tcwg_bmk_gnu_tx1/gnu-release-aarch64-spec2k6-O2 Culprit: <cut> commit 4c4503bf8c538bdbd8db8940aae3d7000885776e Author: Martin Liska <mliska(a)suse.cz> Date: Thu Nov 14 13:04:45 2019 +0100 Remove dead code in switch conv pass. 2019-11-14 Martin Liska <mliska(a)suse.cz> * tree-switch-conversion.c (switch_conversion::switch_conversion): Do not initialize m_other_count. (switch_conversion::collect): Do not count m_default_count and m_other_count as we use frequencies for edges. * tree-switch-conversion.h: Remove m_default_count and m_other_count. From-SVN: r278217 </cut> Results regressed to (for first_bad == 4c4503bf8c538bdbd8db8940aae3d7000885776e) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # true: 0 # benchmark -- -O2 artifacts/build-4c4503bf8c538bdbd8db8940aae3d7000885776e/results_id: 1 # 401.bzip2,bzip2_base.default regressed by 104 # 401.bzip2,[.] mainSort regressed by 116 from (for last_good == e61d0e4e2ed8170491e7dfe597340f86a75a31cd) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # true: 0 # benchmark -- -O2 artifacts/build-baseline/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Results ID of last_good: tx1_64/tcwg_bmk_gnu_tx1/baseline-gnu-release-aarch64-spec2k6-O2/3251 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Results ID of first_bad: tx1_64/tcwg_bmk_gnu_tx1/bisect-gnu-release-aarch64-spec2k6-O2/3279 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Configuration details: Reproduce builds: <cut> mkdir investigate-gcc-4c4503bf8c538bdbd8db8940aae3d7000885776e cd investigate-gcc-4c4503bf8c538bdbd8db8940aae3d7000885776e git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /gcc/ ./ ./bisect/baseline/ cd gcc # Reproduce first_bad build git checkout --detach 4c4503bf8c538bdbd8db8940aae3d7000885776e ../artifacts/test.sh # Reproduce last_good build git checkout --detach e61d0e4e2ed8170491e7dfe597340f86a75a31cd ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Full commit (up to 1000 lines): <cut> commit 4c4503bf8c538bdbd8db8940aae3d7000885776e Author: Martin Liska <mliska(a)suse.cz> Date: Thu Nov 14 13:04:45 2019 +0100 Remove dead code in switch conv pass. 2019-11-14 Martin Liska <mliska(a)suse.cz> * tree-switch-conversion.c (switch_conversion::switch_conversion): Do not initialize m_other_count. (switch_conversion::collect): Do not count m_default_count and m_other_count as we use frequencies for edges. * tree-switch-conversion.h: Remove m_default_count and m_other_count. From-SVN: r278217 --- gcc/ChangeLog | 8 ++++++++ gcc/tree-switch-conversion.c | 6 +----- gcc/tree-switch-conversion.h | 6 ------ 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6060edff243..6b77d87a97d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2019-11-14 Martin Liska <mliska(a)suse.cz> + + * tree-switch-conversion.c (switch_conversion::switch_conversion): + Do not initialize m_other_count. + (switch_conversion::collect): Do not count m_default_count and + m_other_count as we use frequencies for edges. + * tree-switch-conversion.h: Remove m_default_count and m_other_count. + 2019-11-14 Martin Liska <mliska(a)suse.cz> PR other/92329 diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c index af4fd5e33bd..e741f56b520 100644 --- a/gcc/tree-switch-conversion.c +++ b/gcc/tree-switch-conversion.c @@ -61,7 +61,7 @@ using namespace tree_switch_conversion; /* Constructor. */ -switch_conversion::switch_conversion (): m_final_bb (NULL), m_other_count (), +switch_conversion::switch_conversion (): m_final_bb (NULL), m_constructors (NULL), m_default_values (NULL), m_arr_ref_first (NULL), m_arr_ref_last (NULL), m_reason (NULL), m_default_case_nonstandard (false), m_cfg_altered (false) @@ -89,10 +89,6 @@ switch_conversion::collect (gswitch *swtch) e_default = gimple_switch_default_edge (cfun, swtch); m_default_bb = e_default->dest; m_default_prob = e_default->probability; - m_default_count = e_default->count (); - FOR_EACH_EDGE (e, ei, m_switch_bb->succs) - if (e != e_default) - m_other_count += e->count (); /* Get upper and lower bounds of case values, and the covered range. */ min_case = gimple_switch_label (swtch, 1); diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h index c58bccea7f1..a0639fc5477 100644 --- a/gcc/tree-switch-conversion.h +++ b/gcc/tree-switch-conversion.h @@ -819,12 +819,6 @@ public: /* The probability of the default edge in the replaced switch. */ profile_probability m_default_prob; - /* The count of the default edge in the replaced switch. */ - profile_count m_default_count; - - /* Combined count of all other (non-default) edges in the replaced switch. */ - profile_count m_other_count; - /* Number of phi nodes in the final bb (that we'll be replacing). */ int m_phi_count; </cut>

4 years, 9 months

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/llvm-master-aarch64-spec2k6-O3 - Build # 17 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O3. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tx1/llvm-master-aarch64-spec2k6-O3 Culprit: <cut> commit 5c315bee8c9db27d12cead928eea5a3fef97f34f Author: Dawid Jurczak <dawid_jurek(a)vp.pl> Date: Mon Jul 5 11:42:17 2021 +0200 [DSE] Transform memset + malloc --> calloc (PR25892) After this change DSE can eliminate malloc + memset and emit calloc. It's https://reviews.llvm.org/D101440 follow-up. Differential Revision: https://reviews.llvm.org/D103009 </cut> Results regressed to (for first_bad == 5c315bee8c9db27d12cead928eea5a3fef97f34f) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3 artifacts/build-5c315bee8c9db27d12cead928eea5a3fef97f34f/results_id: 1 # 464.h264ref,h264ref_base.default regressed by 105 # 464.h264ref,[.] FastFullPelBlockMotionSearch regressed by 146 from (for last_good == bc5b5ea037dbadd281c59248ae9d2742b51c69ed) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O3 artifacts/build-bc5b5ea037dbadd281c59248ae9d2742b51c69ed/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Results ID of last_good: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O3/3221 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Results ID of first_bad: tx1_64/tcwg_bmk_llvm_tx1/bisect-llvm-master-aarch64-spec2k6-O3/3210 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-5c315bee8c9db27d12cead928eea5a3fef97f34f cd investigate-llvm-5c315bee8c9db27d12cead928eea5a3fef97f34f git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach 5c315bee8c9db27d12cead928eea5a3fef97f34f ../artifacts/test.sh # Reproduce last_good build git checkout --detach bc5b5ea037dbadd281c59248ae9d2742b51c69ed ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tx1-llvm-master-… Full commit (up to 1000 lines): <cut> commit 5c315bee8c9db27d12cead928eea5a3fef97f34f Author: Dawid Jurczak <dawid_jurek(a)vp.pl> Date: Mon Jul 5 11:42:17 2021 +0200 [DSE] Transform memset + malloc --> calloc (PR25892) After this change DSE can eliminate malloc + memset and emit calloc. It's https://reviews.llvm.org/D101440 follow-up. Differential Revision: https://reviews.llvm.org/D103009 --- .../lib/Transforms/Scalar/DeadStoreElimination.cpp | 81 +++++++++-- .../Transforms/DeadStoreElimination/noop-stores.ll | 153 ++++++++++++++++++++- 2 files changed, 219 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index d22b3f409585..0ada5c6e72c9 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -56,6 +56,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" @@ -78,6 +79,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <cassert> @@ -505,7 +507,12 @@ memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BasicBlock::iterator SecondBBI(SecondI); BasicBlock *FirstBB = FirstI->getParent(); BasicBlock *SecondBB = SecondI->getParent(); - MemoryLocation MemLoc = MemoryLocation::get(SecondI); + MemoryLocation MemLoc; + if (auto *MemSet = dyn_cast<MemSetInst>(SecondI)) + MemLoc = MemoryLocation::getForDest(MemSet); + else + MemLoc = MemoryLocation::get(SecondI); + auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr); // Start checking the SecondBB. @@ -819,14 +826,17 @@ bool isNoopIntrinsic(Instruction *I) { } // Check if we can ignore \p D for DSE. -bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { +bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller, + const TargetLibraryInfo &TLI) { Instruction *DI = D->getMemoryInst(); // Calls that only access inaccessible memory cannot read or write any memory // locations we consider for elimination. if (auto *CB = dyn_cast<CallBase>(DI)) - if (CB->onlyAccessesInaccessibleMemory()) + if (CB->onlyAccessesInaccessibleMemory()) { + if (isAllocLikeFn(DI, &TLI)) + return false; return true; - + } // We can eliminate stores to locations not visible to the caller across // throwing instructions. if (DI->mayThrow() && !DefVisibleToCaller) @@ -841,7 +851,7 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { return true; // Skip intrinsics that do not really read or modify memory. - if (isNoopIntrinsic(D->getMemoryInst())) + if (isNoopIntrinsic(DI)) return true; return false; @@ -1389,7 +1399,7 @@ struct DSEState { MemoryDef *CurrentDef = cast<MemoryDef>(Current); Instruction *CurrentI = CurrentDef->getMemoryInst(); - if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO))) + if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO), TLI)) continue; // Before we try to remove anything, check for any extra throwing @@ -1816,13 +1826,58 @@ struct DSEState { if (StoredConstant && StoredConstant->isNullValue()) { auto *DefUOInst = dyn_cast<Instruction>(DefUO); - if (DefUOInst && isCallocLikeFn(DefUOInst, &TLI)) { - auto *UnderlyingDef = cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst)); - // If UnderlyingDef is the clobbering access of Def, no instructions - // between them can modify the memory location. - auto *ClobberDef = - MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def); - return UnderlyingDef == ClobberDef; + if (DefUOInst) { + if (isCallocLikeFn(DefUOInst, &TLI)) { + auto *UnderlyingDef = + cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst)); + // If UnderlyingDef is the clobbering access of Def, no instructions + // between them can modify the memory location. + auto *ClobberDef = + MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def); + return UnderlyingDef == ClobberDef; + } + + if (MemSet) { + if (F.hasFnAttribute(Attribute::SanitizeMemory) || + F.hasFnAttribute(Attribute::SanitizeAddress) || + F.hasFnAttribute(Attribute::SanitizeHWAddress) || + F.getName() == "calloc") + return false; + auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUOInst)); + if (!Malloc) + return false; + auto *InnerCallee = Malloc->getCalledFunction(); + if (!InnerCallee) + return false; + LibFunc Func; + if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) || + Func != LibFunc_malloc) + return false; + if (Malloc->getOperand(0) == MemSet->getLength()) { + if (DT.dominates(Malloc, MemSet) && + memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT)) { + IRBuilder<> IRB(Malloc); + const auto &DL = Malloc->getModule()->getDataLayout(); + AttributeList EmptyList; + if (auto *Calloc = emitCalloc( + ConstantInt::get(IRB.getIntPtrTy(DL), 1), + Malloc->getArgOperand(0), EmptyList, IRB, TLI)) { + MemorySSAUpdater Updater(&MSSA); + auto *LastDef = cast<MemoryDef>( + Updater.getMemorySSA()->getMemoryAccess(Malloc)); + auto *NewAccess = Updater.createMemoryAccessAfter( + cast<Instruction>(Calloc), LastDef, LastDef); + auto *NewAccessMD = cast<MemoryDef>(NewAccess); + Updater.insertDef(NewAccessMD, /*RenameUses=*/true); + Updater.removeMemoryAccess(Malloc); + Malloc->replaceAllUsesWith(Calloc); + Malloc->eraseFromParent(); + return true; + } + return false; + } + } + } } } diff --git a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll index 184653982a6a..12534b6047c5 100644 --- a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll +++ b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll @@ -1,9 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basic-aa -dse -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes='dse,verify<memoryssa>' -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" -declare i8* @calloc(i64, i64) declare void @memset_pattern16(i8*, i8*, i64) declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind @@ -309,6 +308,156 @@ entry: ret void } +declare noalias i8* @malloc(i64) +declare noalias i8* @_Znwm(i64) +declare void @clobber_memory(float*) + +; based on pr25892_lite +define i8* @zero_memset_after_malloc(i64 %size) { +; CHECK-LABEL: @zero_memset_after_malloc( +; CHECK-NEXT: [[CALL:%.*]] = call i8* @calloc(i64 1, i64 [[SIZE:%.*]]) +; CHECK-NEXT: ret i8* [[CALL]] +; + %call = call i8* @malloc(i64 %size) inaccessiblememonly + call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false) + ret i8* %call +} + +; based on pr25892_lite +define i8* @zero_memset_after_malloc_with_intermediate_clobbering(i64 %size) { +; CHECK-LABEL: @zero_memset_after_malloc_with_intermediate_clobbering( +; CHECK-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 [[SIZE:%.*]]) +; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[CALL]] to float* +; CHECK-NEXT: call void @clobber_memory(float* [[BC]]) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 [[SIZE]], i1 false) +; CHECK-NEXT: ret i8* [[CALL]] +; + %call = call i8* @malloc(i64 %size) inaccessiblememonly + %bc = bitcast i8* %call to float* + call void @clobber_memory(float* %bc) + call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false) + ret i8* %call +} + +; based on pr25892_lite +define i8* @zero_memset_after_malloc_with_different_sizes(i64 %size) { +; CHECK-LABEL: @zero_memset_after_malloc_with_different_sizes( +; CHECK-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 [[SIZE:%.*]]) +; CHECK-NEXT: [[SIZE2:%.*]] = add nsw i64 [[SIZE]], -1 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 [[SIZE2]], i1 false) +; CHECK-NEXT: ret i8* [[CALL]] +; + %call = call i8* @malloc(i64 %size) inaccessiblememonly + %size2 = add nsw i64 %size, -1 + call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size2, i1 false) + ret i8* %call +} + +; based on pr25892_lite +define i8* @zero_memset_after_new(i64 %size) { +; CHECK-LABEL: @zero_memset_after_new( +; CHECK-NEXT: [[CALL:%.*]] = call i8* @_Znwm(i64 [[SIZE:%.*]]) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 [[SIZE]], i1 false) +; CHECK-NEXT: ret i8* [[CALL]] +; + %call = call i8* @_Znwm(i64 %size) + call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false) + ret i8* %call +} + +; This should not create a calloc and should not crash the compiler. +define i8* @notmalloc_memset(i64 %size, i8*(i64)* %notmalloc) { +; CHECK-LABEL: @notmalloc_memset( +; CHECK-NEXT: [[CALL1:%.*]] = call i8* [[NOTMALLOC:%.*]](i64 [[SIZE:%.*]]) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL1]], i8 0, i64 [[SIZE]], i1 false) +; CHECK-NEXT: ret i8* [[CALL1]] +; + %call1 = call i8* %notmalloc(i64 %size) + call void @llvm.memset.p0i8.i64(i8* %call1, i8 0, i64 %size, i1 false) + ret i8* %call1 +} + +; This should not create recursive call to calloc. +define i8* @calloc(i64 %nmemb, i64 %size) { +; CHECK-LABEL: @calloc( +; CHECK: entry: +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SIZE:%.*]], [[NMEMB:%.*]] +; CHECK-NEXT: [[CALL:%.*]] = tail call noalias align 16 i8* @malloc(i64 [[MUL]]) +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8* [[CALL]], null +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* nonnull align 16 [[CALL]], i8 0, i64 [[MUL]], i1 false) +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret i8* [[CALL]] +; +entry: + %mul = mul i64 %size, %nmemb + %call = tail call noalias align 16 i8* @malloc(i64 %mul) + %tobool.not = icmp eq i8* %call, null + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + tail call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %call, i8 0, i64 %mul, i1 false) + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i8* %call +} + +define float* @pr25892(i64 %size) { +; CHECK-LABEL: @pr25892( +; CHECK: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i8* @calloc(i64 1, i64 [[SIZE:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[CALL]], null +; CHECK-NEXT: br i1 [[CMP]], label [[CLEANUP:%.*]], label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[CALL]] to float* +; CHECK-NEXT: br label [[CLEANUP]] +; CHECK: cleanup: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi float* [ [[BC]], [[IF_END]] ], [ null, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret float* [[RETVAL_0]] +; +entry: + %call = call i8* @malloc(i64 %size) inaccessiblememonly + %cmp = icmp eq i8* %call, null + br i1 %cmp, label %cleanup, label %if.end +if.end: + %bc = bitcast i8* %call to float* + call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false) + br label %cleanup +cleanup: + %retval.0 = phi float* [ %bc, %if.end ], [ null, %entry ] + ret float* %retval.0 +} + +define float* @pr25892_with_extra_store(i64 %size) { +; CHECK-LABEL: @pr25892_with_extra_store( +; CHECK: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i8* @calloc(i64 1, i64 [[SIZE:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[CALL]], null +; CHECK-NEXT: br i1 [[CMP]], label [[CLEANUP:%.*]], label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[CALL]] to float* +; CHECK-NEXT: br label [[CLEANUP]] +; CHECK: cleanup: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi float* [ [[BC]], [[IF_END]] ], [ null, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret float* [[RETVAL_0]] +; +entry: + %call = call i8* @malloc(i64 %size) inaccessiblememonly + %cmp = icmp eq i8* %call, null + br i1 %cmp, label %cleanup, label %if.end +if.end: + %bc = bitcast i8* %call to float* + call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false) + store i8 0, i8* %call, align 1 + br label %cleanup +cleanup: + %retval.0 = phi float* [ %bc, %if.end ], [ null, %entry ] + ret float* %retval.0 +} + ; PR50143 define i8* @store_zero_after_calloc_inaccessiblememonly() { ; CHECK-LABEL: @store_zero_after_calloc_inaccessiblememonly( </cut>

4 years, 9 months

[ACTIVITY] 2 - 6 August 2021

by Prathamesh Kulkarni

== This Week == * GNU-708 (Attribute to mark param as const) - Created prototype patch - Discussions on gcc mailing list * PR66791 (replace builtins in intrinsics with vector extensions) - Fixed issue with PR98435 test-case as suggested by Christophe - Pinged patches for review. == Next Week == - GNU-708, PR66791

4 years, 9 months

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tk1/gnu-master-arm-spec2k6-O3_LTO - Build # 34 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *gcc* in CI configuration tcwg_bmk_gnu_tk1/gnu-master-arm-spec2k6-O3_LTO. So far, this commit has regressed CI configurations: - tcwg_bmk_gnu_tk1/gnu-master-arm-spec2k6-O3_LTO Culprit: <cut> commit f31da42e047e8018ca6ad9809273bc7efb6ffcaf Author: Richard Biener <rguenther(a)suse.de> Date: Fri Aug 6 14:39:05 2021 +0200 tree-optimization/101801 - remove vect_worthwhile_without_simd_p This removes the cost part of vect_worthwhile_without_simd_p, retaining only the correctness bits. The reason is that the cost heuristic do not properly account for SLP plus the check whether "without simd" applies misfires for AVX512 mask vectors at the moment, leading to missed vectorizations there. Any costing decision should take place in the cost modeling, no single stmt is to disable all vectorization on its own. 2021-08-06 Richard Biener <rguenther(a)suse.de> PR tree-optimization/101801 * tree-vectorizer.h (vect_worthwhile_without_simd_p): Rename... (vect_can_vectorize_without_simd_p): ... to this. * tree-vect-loop.c (vect_worthwhile_without_simd_p): Rename... (vect_can_vectorize_without_simd_p): ... to this and fold in vect_min_worthwhile_factor. (vect_min_worthwhile_factor): Remove. (vectorizable_reduction): Adjust and remove the cost part. * tree-vect-stmts.c (vectorizable_shift): Likewise. (vectorizable_operation): Likewise. </cut> Results regressed to (for first_bad == f31da42e047e8018ca6ad9809273bc7efb6ffcaf) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # true: 0 # benchmark -- -O3_LTO_marm artifacts/build-f31da42e047e8018ca6ad9809273bc7efb6ffcaf/results_id: 1 # 482.sphinx3,sphinx_livepretend_base.default regressed by 105 from (for last_good == c2a984a3570b908a44a35e43bb48f0a05196156a) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # true: 0 # benchmark -- -O3_LTO_marm artifacts/build-c2a984a3570b908a44a35e43bb48f0a05196156a/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tk1-gnu-master-ar… Results ID of last_good: tk1_32/tcwg_bmk_gnu_tk1/bisect-gnu-master-arm-spec2k6-O3_LTO/3203 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tk1-gnu-master-ar… Results ID of first_bad: tk1_32/tcwg_bmk_gnu_tk1/bisect-gnu-master-arm-spec2k6-O3_LTO/3211 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tk1-gnu-master-ar… Configuration details: Reproduce builds: <cut> mkdir investigate-gcc-f31da42e047e8018ca6ad9809273bc7efb6ffcaf cd investigate-gcc-f31da42e047e8018ca6ad9809273bc7efb6ffcaf git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tk1-gnu-master-ar… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tk1-gnu-master-ar… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tk1-gnu-master-ar… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /gcc/ ./ ./bisect/baseline/ cd gcc # Reproduce first_bad build git checkout --detach f31da42e047e8018ca6ad9809273bc7efb6ffcaf ../artifacts/test.sh # Reproduce last_good build git checkout --detach c2a984a3570b908a44a35e43bb48f0a05196156a ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tk1-gnu-master-ar… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tk1-gnu-master-ar… Full commit (up to 1000 lines): <cut> commit f31da42e047e8018ca6ad9809273bc7efb6ffcaf Author: Richard Biener <rguenther(a)suse.de> Date: Fri Aug 6 14:39:05 2021 +0200 tree-optimization/101801 - remove vect_worthwhile_without_simd_p This removes the cost part of vect_worthwhile_without_simd_p, retaining only the correctness bits. The reason is that the cost heuristic do not properly account for SLP plus the check whether "without simd" applies misfires for AVX512 mask vectors at the moment, leading to missed vectorizations there. Any costing decision should take place in the cost modeling, no single stmt is to disable all vectorization on its own. 2021-08-06 Richard Biener <rguenther(a)suse.de> PR tree-optimization/101801 * tree-vectorizer.h (vect_worthwhile_without_simd_p): Rename... (vect_can_vectorize_without_simd_p): ... to this. * tree-vect-loop.c (vect_worthwhile_without_simd_p): Rename... (vect_can_vectorize_without_simd_p): ... to this and fold in vect_min_worthwhile_factor. (vect_min_worthwhile_factor): Remove. (vectorizable_reduction): Adjust and remove the cost part. * tree-vect-stmts.c (vectorizable_shift): Likewise. (vectorizable_operation): Likewise. --- gcc/tree-vect-loop.c | 43 +++++++------------------------------------ gcc/tree-vect-stmts.c | 26 ++------------------------ gcc/tree-vectorizer.h | 2 +- 3 files changed, 10 insertions(+), 61 deletions(-) diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 1e21fe6b13d..37c7daa7f9e 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -7227,24 +7227,13 @@ vectorizable_reduction (loop_vec_info loop_vinfo, if (dump_enabled_p ()) dump_printf (MSG_NOTE, "op not supported by target.\n"); if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD) - || !vect_worthwhile_without_simd_p (loop_vinfo, code)) + || !vect_can_vectorize_without_simd_p (code)) ok = false; else if (dump_enabled_p ()) dump_printf (MSG_NOTE, "proceeding using word mode.\n"); } - /* Worthwhile without SIMD support? */ - if (ok - && !VECTOR_MODE_P (TYPE_MODE (vectype_in)) - && !vect_worthwhile_without_simd_p (loop_vinfo, code)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not worthwhile without SIMD support.\n"); - ok = false; - } - /* lane-reducing operations have to go through vect_transform_reduction. For the other cases try without the single cycle optimization. */ if (!ok) @@ -7948,46 +7937,28 @@ vectorizable_phi (vec_info *, } -/* Function vect_min_worthwhile_factor. +/* Return true if we can emulate CODE on an integer mode representation + of a vector. */ - For a loop where we could vectorize the operation indicated by CODE, - return the minimum vectorization factor that makes it worthwhile - to use generic vectors. */ -static unsigned int -vect_min_worthwhile_factor (enum tree_code code) +bool +vect_can_vectorize_without_simd_p (tree_code code) { switch (code) { case PLUS_EXPR: case MINUS_EXPR: case NEGATE_EXPR: - return 4; - case BIT_AND_EXPR: case BIT_IOR_EXPR: case BIT_XOR_EXPR: case BIT_NOT_EXPR: - return 2; + return true; default: - return INT_MAX; + return false; } } -/* Return true if VINFO indicates we are doing loop vectorization and if - it is worth decomposing CODE operations into scalar operations for - that loop's vectorization factor. */ - -bool -vect_worthwhile_without_simd_p (vec_info *vinfo, tree_code code) -{ - loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); - unsigned HOST_WIDE_INT value; - return (loop_vinfo - && LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&value) - && value >= vect_min_worthwhile_factor (code)); -} - /* Function vectorizable_induction Check if STMT_INFO performs an induction computation that can be vectorized. diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 94bdb74ea8d..5b94d41e292 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -5685,24 +5685,13 @@ vectorizable_shift (vec_info *vinfo, /* Check only during analysis. */ if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD) || (!vec_stmt - && !vect_worthwhile_without_simd_p (vinfo, code))) + && !vect_can_vectorize_without_simd_p (code))) return false; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.\n"); } - /* Worthwhile without SIMD support? Check only during analysis. */ - if (!vec_stmt - && !VECTOR_MODE_P (TYPE_MODE (vectype)) - && !vect_worthwhile_without_simd_p (vinfo, code)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not worthwhile without SIMD support.\n"); - return false; - } - if (!vec_stmt) /* transformation not required. */ { if (slp_node @@ -6094,24 +6083,13 @@ vectorizable_operation (vec_info *vinfo, "op not supported by target.\n"); /* Check only during analysis. */ if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD) - || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code))) + || (!vec_stmt && !vect_can_vectorize_without_simd_p (code))) return false; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.\n"); } - /* Worthwhile without SIMD support? Check only during analysis. */ - if (!VECTOR_MODE_P (vec_mode) - && !vec_stmt - && !vect_worthwhile_without_simd_p (vinfo, code)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not worthwhile without SIMD support.\n"); - return false; - } - int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info); vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL); internal_fn cond_fn = get_conditional_internal_fn (code); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 5571b3cce3b..de0ecf86478 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2061,7 +2061,7 @@ extern bool vectorizable_lc_phi (loop_vec_info, stmt_vec_info, gimple **, slp_tree); extern bool vectorizable_phi (vec_info *, stmt_vec_info, gimple **, slp_tree, stmt_vector_for_cost *); -extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code); +extern bool vect_can_vectorize_without_simd_p (tree_code); extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, stmt_vector_for_cost *, stmt_vector_for_cost *, </cut>

4 years, 9 months

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_apm/llvm-release-arm-spec2k6-Os - Build # 1 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *gcc* in CI configuration tcwg_bmk_llvm_apm/llvm-release-arm-spec2k6-Os. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_apm/llvm-release-arm-spec2k6-Os Culprit: <cut> commit b9bb6a5e12cae44a1cbf298b69f28fc6871f81c8 Author: Jakub Jelinek <jakub(a)redhat.com> Date: Tue Aug 11 16:46:49 2020 +0200 c-family: Fix ICE in get_atomic_generic_size [PR96545] As the testcase shows, we would ICE if the type of the first argument of various atomic builtins was pointer to (non-void) incomplete type, we would assume that TYPE_SIZE_UNIT must be non-NULL. This patch diagnoses it instead. And also changes the TREE_CODE != INTEGER_CST check to !tree_fits_uhwi_p, as we use tree_to_uhwi after this and at least in theory the int could be too large and not fit. 2020-08-11 Jakub Jelinek <jakub(a)redhat.com> PR c/96545 * c-common.c (get_atomic_generic_size): Require that first argument's type points to a complete type and use tree_fits_uhwi_p instead of just INTEGER_CST TREE_CODE check for the TYPE_SIZE_UNIT. * c-c++-common/pr96545.c: New test. (cherry picked from commit 7840b4dc05539cf5575b3e9ff57ff5f6c3da2cae) </cut> Results regressed to (for first_bad == b9bb6a5e12cae44a1cbf298b69f28fc6871f81c8) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=thumb --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=thumb --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Os_mthumb artifacts/build-b9bb6a5e12cae44a1cbf298b69f28fc6871f81c8/results_id: 1 # 429.mcf,mcf_base.default regressed by 104 # 470.lbm,lbm_base.default regressed by 103 from (for last_good == db00336a49707327552e678b59da8e85384bdae6) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=thumb --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=thumb --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Os_mthumb artifacts/build-db00336a49707327552e678b59da8e85384bdae6/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-release… Results ID of last_good: apm_32/tcwg_bmk_llvm_apm/bisect-llvm-release-arm-spec2k6-Os/3201 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-release… Results ID of first_bad: apm_32/tcwg_bmk_llvm_apm/bisect-llvm-release-arm-spec2k6-Os/3141 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-release… Configuration details: Reproduce builds: <cut> mkdir investigate-gcc-b9bb6a5e12cae44a1cbf298b69f28fc6871f81c8 cd investigate-gcc-b9bb6a5e12cae44a1cbf298b69f28fc6871f81c8 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-release… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-release… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-release… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /gcc/ ./ ./bisect/baseline/ cd gcc # Reproduce first_bad build git checkout --detach b9bb6a5e12cae44a1cbf298b69f28fc6871f81c8 ../artifacts/test.sh # Reproduce last_good build git checkout --detach db00336a49707327552e678b59da8e85384bdae6 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-release… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-release… Full commit (up to 1000 lines): <cut> commit b9bb6a5e12cae44a1cbf298b69f28fc6871f81c8 Author: Jakub Jelinek <jakub(a)redhat.com> Date: Tue Aug 11 16:46:49 2020 +0200 c-family: Fix ICE in get_atomic_generic_size [PR96545] As the testcase shows, we would ICE if the type of the first argument of various atomic builtins was pointer to (non-void) incomplete type, we would assume that TYPE_SIZE_UNIT must be non-NULL. This patch diagnoses it instead. And also changes the TREE_CODE != INTEGER_CST check to !tree_fits_uhwi_p, as we use tree_to_uhwi after this and at least in theory the int could be too large and not fit. 2020-08-11 Jakub Jelinek <jakub(a)redhat.com> PR c/96545 * c-common.c (get_atomic_generic_size): Require that first argument's type points to a complete type and use tree_fits_uhwi_p instead of just INTEGER_CST TREE_CODE check for the TYPE_SIZE_UNIT. * c-c++-common/pr96545.c: New test. (cherry picked from commit 7840b4dc05539cf5575b3e9ff57ff5f6c3da2cae) --- gcc/c-family/c-common.c | 9 ++++++++- gcc/testsuite/c-c++-common/pr96545.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c index 20258c331af..b6eb40c8122 100644 --- a/gcc/c-family/c-common.c +++ b/gcc/c-family/c-common.c @@ -6948,8 +6948,15 @@ get_atomic_generic_size (location_t loc, tree function, return 0; } + if (!COMPLETE_TYPE_P (TREE_TYPE (type_0))) + { + error_at (loc, "argument 1 of %qE must be a pointer to a complete type", + function); + return 0; + } + /* Types must be compile time constant sizes. */ - if (TREE_CODE ((TYPE_SIZE_UNIT (TREE_TYPE (type_0)))) != INTEGER_CST) + if (!tree_fits_uhwi_p ((TYPE_SIZE_UNIT (TREE_TYPE (type_0))))) { error_at (loc, "argument 1 of %qE must be a pointer to a constant size type", diff --git a/gcc/testsuite/c-c++-common/pr96545.c b/gcc/testsuite/c-c++-common/pr96545.c new file mode 100644 index 00000000000..bc6b0cf345c --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr96545.c @@ -0,0 +1,31 @@ +/* PR c/96545 */ +/* { dg-do compile } */ + +extern char x[], y[], z[]; +struct S; +extern struct S s, t, u; +int v, w; + +void +foo (void) +{ + __atomic_exchange (&x, &y, &z, 0); /* { dg-error "must be a pointer to a complete type" } */ +} + +void +bar (void) +{ + __atomic_exchange (&s, &t, &u, 0); /* { dg-error "must be a pointer to a complete type" } */ +} + +void +baz (void) +{ + __atomic_exchange (&v, &t, &w, 0); /* { dg-error "size mismatch in argument 2 of" } */ +} + +void +qux (void) +{ + __atomic_exchange (&v, &w, &t, 0); /* { dg-error "size mismatch in argument 3 of" } */ +} </cut>

4 years, 9 months

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/gnu-release-aarch64-spec2k6-O2 - Build # 21 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *gcc* in CI configuration tcwg_bmk_gnu_tx1/gnu-release-aarch64-spec2k6-O2. So far, this commit has regressed CI configurations: - tcwg_bmk_gnu_tx1/gnu-release-aarch64-spec2k6-O2 Culprit: <cut> commit e61d0e4e2ed8170491e7dfe597340f86a75a31cd Author: Martin Liska <mliska(a)suse.cz> Date: Thu Nov 14 09:57:57 2019 +0100 Remove wrong lto-dump: lto1 makefile dependency. 2019-11-14 Martin Liska <mliska(a)suse.cz> * Make-lang.in: Remove wrong dependency of LTO_DUMP_EXE on LTO_EXE. From-SVN: r278212 </cut> Results regressed to (for first_bad == e61d0e4e2ed8170491e7dfe597340f86a75a31cd) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # true: 0 # benchmark -- -O2 artifacts/build-e61d0e4e2ed8170491e7dfe597340f86a75a31cd/results_id: 1 # 447.dealII,dealII_base.default regressed by 103 from (for last_good == 0840ffdf4e3568ba586371682ef485f9e5d31ae2) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # true: 0 # benchmark -- -O2 artifacts/build-0840ffdf4e3568ba586371682ef485f9e5d31ae2/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Results ID of last_good: tx1_64/tcwg_bmk_gnu_tx1/bisect-gnu-release-aarch64-spec2k6-O2/3123 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Results ID of first_bad: tx1_64/tcwg_bmk_gnu_tx1/bisect-gnu-release-aarch64-spec2k6-O2/3116 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Configuration details: Reproduce builds: <cut> mkdir investigate-gcc-e61d0e4e2ed8170491e7dfe597340f86a75a31cd cd investigate-gcc-e61d0e4e2ed8170491e7dfe597340f86a75a31cd git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /gcc/ ./ ./bisect/baseline/ cd gcc # Reproduce first_bad build git checkout --detach e61d0e4e2ed8170491e7dfe597340f86a75a31cd ../artifacts/test.sh # Reproduce last_good build git checkout --detach 0840ffdf4e3568ba586371682ef485f9e5d31ae2 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_gnu-bisect-tcwg_bmk_tx1-gnu-release-a… Full commit (up to 1000 lines): <cut> commit e61d0e4e2ed8170491e7dfe597340f86a75a31cd Author: Martin Liska <mliska(a)suse.cz> Date: Thu Nov 14 09:57:57 2019 +0100 Remove wrong lto-dump: lto1 makefile dependency. 2019-11-14 Martin Liska <mliska(a)suse.cz> * Make-lang.in: Remove wrong dependency of LTO_DUMP_EXE on LTO_EXE. From-SVN: r278212 --- gcc/lto/ChangeLog | 5 +++++ gcc/lto/Make-lang.in | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/gcc/lto/ChangeLog b/gcc/lto/ChangeLog index cd035e60b9b..ba3e38aa364 100644 --- a/gcc/lto/ChangeLog +++ b/gcc/lto/ChangeLog @@ -1,3 +1,8 @@ +2019-11-14 Martin Liska <mliska(a)suse.cz> + + * Make-lang.in: Remove wrong dependency + of LTO_DUMP_EXE on LTO_EXE. + 2019-11-12 Martin Liska <mliska(a)suse.cz> * lto-common.c: Do not include params.h. diff --git a/gcc/lto/Make-lang.in b/gcc/lto/Make-lang.in index faee8899502..46df75ab59b 100644 --- a/gcc/lto/Make-lang.in +++ b/gcc/lto/Make-lang.in @@ -88,7 +88,7 @@ $(LTO_EXE): $(LTO_OBJS) $(BACKEND) $(LIBDEPS) +$(LLINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ \ $(LTO_OBJS) $(BACKEND) $(BACKENDLIBS) $(LIBS) -$(LTO_DUMP_EXE): $(LTO_EXE) $(LTO_DUMP_OBJS) $(BACKEND) $(LIBDEPS) +$(LTO_DUMP_EXE): $(LTO_DUMP_OBJS) $(BACKEND) $(LIBDEPS) +$(LLINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ \ $(LTO_DUMP_OBJS) $(BACKEND) $(BACKENDLIBS) $(LIBS) </cut>

4 years, 9 months

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_apm/llvm-master-aarch64-spec2k6-Oz - Build # 2 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *binutils* in CI configuration tcwg_bmk_llvm_apm/llvm-master-aarch64-spec2k6-Oz. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_apm/llvm-master-aarch64-spec2k6-Oz Culprit: <cut> commit b4ee29a4450a88a64d99db0e49bac2e8670e086b Author: GDB Administrator <gdbadmin(a)sourceware.org> Date: Tue Jun 22 00:00:08 2021 +0000 Automatic date update in version.in </cut> Results regressed to (for first_bad == b4ee29a4450a88a64d99db0e49bac2e8670e086b) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Oz artifacts/build-b4ee29a4450a88a64d99db0e49bac2e8670e086b/results_id: 1 # 482.sphinx3,[.] OUTLINED_FUNCTION_4 regressed by 150 from (for last_good == 96f842cbdb37bb84fb1bab914304a3eff152ad0d) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Oz artifacts/build-96f842cbdb37bb84fb1bab914304a3eff152ad0d/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Results ID of last_good: apm_64/tcwg_bmk_llvm_apm/bisect-llvm-master-aarch64-spec2k6-Oz/3096 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Results ID of first_bad: apm_64/tcwg_bmk_llvm_apm/bisect-llvm-master-aarch64-spec2k6-Oz/3110 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Configuration details: Reproduce builds: <cut> mkdir investigate-binutils-b4ee29a4450a88a64d99db0e49bac2e8670e086b cd investigate-binutils-b4ee29a4450a88a64d99db0e49bac2e8670e086b git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /binutils/ ./ ./bisect/baseline/ cd binutils # Reproduce first_bad build git checkout --detach b4ee29a4450a88a64d99db0e49bac2e8670e086b ../artifacts/test.sh # Reproduce last_good build git checkout --detach 96f842cbdb37bb84fb1bab914304a3eff152ad0d ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Full commit (up to 1000 lines): <cut> commit b4ee29a4450a88a64d99db0e49bac2e8670e086b Author: GDB Administrator <gdbadmin(a)sourceware.org> Date: Tue Jun 22 00:00:08 2021 +0000 Automatic date update in version.in --- bfd/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bfd/version.h b/bfd/version.h index 087c9d0e80d..6c42e8315f3 100644 --- a/bfd/version.h +++ b/bfd/version.h @@ -16,7 +16,7 @@ In releases, the date is not included in either version strings or sonames. */ -#define BFD_VERSION_DATE 20210621 +#define BFD_VERSION_DATE 20210622 #define BFD_VERSION @bfd_version@ #define BFD_VERSION_STRING @bfd_version_package@ @bfd_version_string@ #define REPORT_BUGS_TO @report_bugs_to@ </cut>

4 years, 9 months

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tk1/llvm-master-arm-spec2k6-O2_LTO - Build # 13 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *binutils* in CI configuration tcwg_bmk_llvm_tk1/llvm-master-arm-spec2k6-O2_LTO. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_tk1/llvm-master-arm-spec2k6-O2_LTO Culprit: <cut> commit 8179e388b60acc6ac35b40cd154f8d56234d1c3b Author: GDB Administrator <gdbadmin(a)sourceware.org> Date: Fri Aug 6 00:00:23 2021 +0000 Automatic date update in version.in </cut> Results regressed to (for first_bad == 8179e388b60acc6ac35b40cd154f8d56234d1c3b) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2_LTO_marm artifacts/build-8179e388b60acc6ac35b40cd154f8d56234d1c3b/results_id: 1 # 456.hmmer,hmmer_base.default regressed by 103 from (for last_good == c2bc854c8bfa24c51c902563e6b145c297d577c9) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=arm --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -O2_LTO_marm artifacts/build-c2bc854c8bfa24c51c902563e6b145c297d577c9/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-… Results ID of last_good: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-master-arm-spec2k6-O2_LTO/3089 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-… Results ID of first_bad: tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-master-arm-spec2k6-O2_LTO/3057 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-… Configuration details: Reproduce builds: <cut> mkdir investigate-binutils-8179e388b60acc6ac35b40cd154f8d56234d1c3b cd investigate-binutils-8179e388b60acc6ac35b40cd154f8d56234d1c3b git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /binutils/ ./ ./bisect/baseline/ cd binutils # Reproduce first_bad build git checkout --detach 8179e388b60acc6ac35b40cd154f8d56234d1c3b ../artifacts/test.sh # Reproduce last_good build git checkout --detach c2bc854c8bfa24c51c902563e6b145c297d577c9 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-… Full commit (up to 1000 lines): <cut> commit 8179e388b60acc6ac35b40cd154f8d56234d1c3b Author: GDB Administrator <gdbadmin(a)sourceware.org> Date: Fri Aug 6 00:00:23 2021 +0000 Automatic date update in version.in --- bfd/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bfd/version.h b/bfd/version.h index 83f1bc16ee7..ecc15ad12a6 100644 --- a/bfd/version.h +++ b/bfd/version.h @@ -16,7 +16,7 @@ In releases, the date is not included in either version strings or sonames. */ -#define BFD_VERSION_DATE 20210805 +#define BFD_VERSION_DATE 20210806 #define BFD_VERSION @bfd_version@ #define BFD_VERSION_STRING @bfd_version_package@ @bfd_version_string@ #define REPORT_BUGS_TO @report_bugs_to@ </cut>

4 years, 9 months

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_tx1/llvm-master-aarch64-spec2k6-O3 - Build # 16 - Successful!

by ci_notify＠linaro.org

4 years, 9 months

[CI-NOTIFY]: TCWG Bisect tcwg_bmk_apm/llvm-master-arm-spec2k6-Os_LTO - Build # 3 - Successful!

by ci_notify＠linaro.org

Successfully identified regression in *llvm* in CI configuration tcwg_bmk_llvm_apm/llvm-master-arm-spec2k6-Os_LTO. So far, this commit has regressed CI configurations: - tcwg_bmk_llvm_apm/llvm-master-arm-spec2k6-Os_LTO Culprit: <cut> commit 0276db14167b9348904322084e7fc1a04cc72452 Author: Jose M Monsalve Diaz <jmonsalvediaz(a)anl.gov> Date: Tue Jul 27 17:20:47 2021 -0400 [OpenMP] Creating the `omp_target_num_teams` and `omp_target_thread_limit` attributes to outlined functions The device runtime contains several calls to __kmpc_get_hardware_num_threads_in_block and __kmpc_get_hardware_num_blocks. If the thread_limit and the num_teams are constant, these calls can be folded to the constant value. In commit D106033 we have the optimization phase. This commit adds the attributes to the outlined function for the grid size. the two attributes are `omp_target_num_teams` and `omp_target_thread_limit`. These values are added as long as they are constant. Two functions are created `getNumThreadsExprForTargetDirective` and `getNumTeamsExprForTargetDirective`. The original functions `emitNumTeamsForTargetDirective` and `emitNumThreadsForTargetDirective` identify the expresion and emit the code. However, for the Device version of the outlined function, we cannot emit anything. Therefore, this is a first attempt to separate emision of code from deduction of the values. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D106298 </cut> Results regressed to (for first_bad == 0276db14167b9348904322084e7fc1a04cc72452) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=thumb --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=thumb --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Os_LTO_mthumb artifacts/build-0276db14167b9348904322084e7fc1a04cc72452/results_id: 1 # 456.hmmer,hmmer_base.default regressed by 102 from (for last_good == c49df15c278857adecd12db6bb1cdc96885f7079) # reset_artifacts: -10 # build_abe binutils: -9 # build_abe stage1 -- --set gcc_override_configure=--with-mode=thumb --set gcc_override_configure=--disable-libsanitizer: -8 # build_abe linux: -7 # build_abe glibc: -6 # build_abe stage2 -- --set gcc_override_configure=--with-mode=thumb --set gcc_override_configure=--disable-libsanitizer: -5 # build_llvm true: -3 # true: 0 # benchmark -- -Os_LTO_mthumb artifacts/build-c49df15c278857adecd12db6bb1cdc96885f7079/results_id: 1 Artifacts of last_good build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Results ID of last_good: apm_32/tcwg_bmk_llvm_apm/bisect-llvm-master-arm-spec2k6-Os_LTO/3026 Artifacts of first_bad build: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Results ID of first_bad: apm_32/tcwg_bmk_llvm_apm/bisect-llvm-master-arm-spec2k6-Os_LTO/3024 Build top page/logs: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Configuration details: Reproduce builds: <cut> mkdir investigate-llvm-0276db14167b9348904322084e7fc1a04cc72452 cd investigate-llvm-0276db14167b9348904322084e7fc1a04cc72452 git clone https://git.linaro.org/toolchain/jenkins-scripts mkdir -p artifacts/manifests curl -o artifacts/manifests/build-baseline.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail curl -o artifacts/manifests/build-parameters.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail curl -o artifacts/test.sh https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… --fail chmod +x artifacts/test.sh # Reproduce the baseline build (build all pre-requisites) ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh # Save baseline build state (which is then restored in artifacts/test.sh) mkdir -p ./bisect rsync -a --del --delete-excluded --exclude /bisect/ --exclude /artifacts/ --exclude /llvm/ ./ ./bisect/baseline/ cd llvm # Reproduce first_bad build git checkout --detach 0276db14167b9348904322084e7fc1a04cc72452 ../artifacts/test.sh # Reproduce last_good build git checkout --detach c49df15c278857adecd12db6bb1cdc96885f7079 ../artifacts/test.sh cd .. </cut> History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/… Artifacts: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Build log: https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_apm-llvm-master-… Full commit (up to 1000 lines): <cut> commit 0276db14167b9348904322084e7fc1a04cc72452 Author: Jose M Monsalve Diaz <jmonsalvediaz(a)anl.gov> Date: Tue Jul 27 17:20:47 2021 -0400 [OpenMP] Creating the `omp_target_num_teams` and `omp_target_thread_limit` attributes to outlined functions The device runtime contains several calls to __kmpc_get_hardware_num_threads_in_block and __kmpc_get_hardware_num_blocks. If the thread_limit and the num_teams are constant, these calls can be folded to the constant value. In commit D106033 we have the optimization phase. This commit adds the attributes to the outlined function for the grid size. the two attributes are `omp_target_num_teams` and `omp_target_thread_limit`. These values are added as long as they are constant. Two functions are created `getNumThreadsExprForTargetDirective` and `getNumTeamsExprForTargetDirective`. The original functions `emitNumTeamsForTargetDirective` and `emitNumThreadsForTargetDirective` identify the expresion and emit the code. However, for the Device version of the outlined function, we cannot emit anything. Therefore, this is a first attempt to separate emision of code from deduction of the values. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D106298 --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 252 +++++++-- clang/lib/CodeGen/CGOpenMPRuntime.h | 29 ++ .../declare_target_codegen_globalization.cpp | 12 +- clang/test/OpenMP/nvptx_lambda_capturing.cpp | 24 +- .../OpenMP/nvptx_multi_target_parallel_codegen.cpp | 54 +- clang/test/OpenMP/nvptx_target_codegen.cpp | 96 ++-- .../test/OpenMP/nvptx_target_parallel_codegen.cpp | 24 +- .../nvptx_target_parallel_num_threads_codegen.cpp | 24 +- ...arget_teams_distribute_parallel_for_codegen.cpp | 152 +++--- ..._teams_distribute_parallel_for_simd_codegen.cpp | 78 +-- clang/test/OpenMP/target_map_codegen_03.cpp | 73 ++- .../target_num_teams_num_threads_attributes.cpp | 175 +++++++ clang/test/OpenMP/target_parallel_codegen.cpp | 512 +++++++++--------- .../test/OpenMP/target_parallel_debug_codegen.cpp | 6 +- clang/test/OpenMP/target_parallel_for_codegen.cpp | 576 ++++++++++----------- .../OpenMP/target_parallel_for_debug_codegen.cpp | 6 +- .../OpenMP/target_parallel_for_simd_codegen.cpp | 560 ++++++++++---------- clang/test/OpenMP/target_parallel_if_codegen.cpp | 481 +++++++++-------- .../OpenMP/target_parallel_num_threads_codegen.cpp | 305 ++++++----- .../target_teams_distribute_simd_codegen.cpp | 560 ++++++++++---------- ...rget_teams_distribute_simd_collapse_codegen.cpp | 408 +++++++-------- ...teams_distribute_simd_dist_schedule_codegen.cpp | 168 +++--- ..._teams_distribute_simd_firstprivate_codegen.cpp | 316 +++++------ ...t_teams_distribute_simd_lastprivate_codegen.cpp | 496 +++++++++--------- ...arget_teams_distribute_simd_private_codegen.cpp | 316 +++++------ ...get_teams_distribute_simd_reduction_codegen.cpp | 284 +++++----- .../test/OpenMP/target_teams_num_teams_codegen.cpp | 80 +-- .../OpenMP/target_teams_thread_limit_codegen.cpp | 112 ++-- clang/test/OpenMP/teams_codegen.cpp | 16 +- 29 files changed, 3279 insertions(+), 2916 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index e7aa84ef3d90..c09797e91b99 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6551,6 +6551,20 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( OffloadEntriesInfoManager.registerTargetRegionEntryInfo( DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); + + // Add NumTeams and ThreadLimit attributes to the outlined GPU function + int32_t DefaultValTeams = -1; + getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); + if (DefaultValTeams > 0) { + OutlinedFn->addFnAttr("omp_target_num_teams", + std::to_string(DefaultValTeams)); + } + int32_t DefaultValThreads = -1; + getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); + if (DefaultValThreads > 0) { + OutlinedFn->addFnAttr("omp_target_thread_limit", + std::to_string(DefaultValThreads)); + } } /// Checks if the expression is constant or does not have non-trivial function @@ -6605,24 +6619,13 @@ const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, return Child; } -/// Emit the number of teams for a target directive. Inspect the num_teams -/// clause associated with a teams construct combined or closely nested -/// with the target directive. -/// -/// Emit a team of size one for directives such as 'target parallel' that -/// have no associated teams construct. -/// -/// Otherwise, return nullptr. -static llvm::Value * -emitNumTeamsForTargetDirective(CodeGenFunction &CGF, - const OMPExecutableDirective &D) { - assert(!CGF.getLangOpts().OpenMPIsDevice && - "Clauses associated with the teams directive expected to be emitted " - "only for the host!"); +const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + int32_t &DefaultVal) { + OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); assert(isOpenMPTargetExecutionDirective(DirectiveKind) && "Expected target-based executable directive."); - CGBuilderTy &Bld = CGF.Builder; switch (DirectiveKind) { case OMPD_target: { const auto *CS = D.getInnermostCapturedStmt(); @@ -6634,23 +6637,27 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { - CGOpenMPInnerExprInfo CGInfo(CGF, *CS); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); - llvm::Value *NumTeamsVal = - CGF.EmitScalarExpr(NumTeams, - /*IgnoreResultAssign*/ true); - return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, - /*isSigned=*/true); + if (NumTeams->isIntegerConstantExpr(CGF.getContext())) + if (auto Constant = + NumTeams->getIntegerConstantExpr(CGF.getContext())) + DefaultVal = Constant->getExtValue(); + return NumTeams; } - return Bld.getInt32(0); + DefaultVal = 0; + return nullptr; } if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || - isOpenMPSimdDirective(NestedDir->getDirectiveKind())) - return Bld.getInt32(1); - return Bld.getInt32(0); + isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { + DefaultVal = 1; + return nullptr; + } + DefaultVal = 1; + return nullptr; } + // A value of -1 is used to check if we need to emit no teams region + DefaultVal = -1; return nullptr; } case OMPD_target_teams: @@ -6659,22 +6666,22 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: { if (D.hasClausesOfKind<OMPNumTeamsClause>()) { - CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); const Expr *NumTeams = D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); - llvm::Value *NumTeamsVal = - CGF.EmitScalarExpr(NumTeams, - /*IgnoreResultAssign*/ true); - return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, - /*isSigned=*/true); + if (NumTeams->isIntegerConstantExpr(CGF.getContext())) + if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) + DefaultVal = Constant->getExtValue(); + return NumTeams; } - return Bld.getInt32(0); + DefaultVal = 0; + return nullptr; } case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: case OMPD_target_simd: - return Bld.getInt32(1); + DefaultVal = 1; + return nullptr; case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: @@ -6740,6 +6747,48 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, llvm_unreachable("Unexpected directive kind."); } +llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( + CodeGenFunction &CGF, const OMPExecutableDirective &D) { + assert(!CGF.getLangOpts().OpenMPIsDevice && + "Clauses associated with the teams directive expected to be emitted " + "only for the host!"); + CGBuilderTy &Bld = CGF.Builder; + int32_t DefaultNT = -1; + const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); + if (NumTeams != nullptr) { + OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); + + switch (DirectiveKind) { + case OMPD_target: { + const auto *CS = D.getInnermostCapturedStmt(); + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, + /*IgnoreResultAssign*/ true); + return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, + /*isSigned=*/true); + } + case OMPD_target_teams: + case OMPD_target_teams_distribute: + case OMPD_target_teams_distribute_simd: + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: { + CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); + llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, + /*IgnoreResultAssign*/ true); + return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, + /*isSigned=*/true); + } + default: + break; + } + } else if (DefaultNT == -1) { + return nullptr; + } + + return Bld.getInt32(DefaultNT); +} + static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, llvm::Value *DefaultThreadLimitVal) { const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( @@ -6832,17 +6881,130 @@ static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, : CGF.Builder.getInt32(0); } -/// Emit the number of threads for a target directive. Inspect the -/// thread_limit clause associated with a teams construct combined or closely -/// nested with the target directive. -/// -/// Emit the num_threads clause for directives such as 'target parallel' that -/// have no associated teams construct. -/// -/// Otherwise, return nullptr. -static llvm::Value * -emitNumThreadsForTargetDirective(CodeGenFunction &CGF, - const OMPExecutableDirective &D) { +const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + int32_t &DefaultVal) { + OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); + assert(isOpenMPTargetExecutionDirective(DirectiveKind) && + "Expected target-based executable directive."); + + switch (DirectiveKind) { + case OMPD_target: + // Teams have no clause thread_limit + return nullptr; + case OMPD_target_teams: + case OMPD_target_teams_distribute: + if (D.hasClausesOfKind<OMPThreadLimitClause>()) { + const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); + const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); + if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) + if (auto Constant = + ThreadLimit->getIntegerConstantExpr(CGF.getContext())) + DefaultVal = Constant->getExtValue(); + return ThreadLimit; + } + return nullptr; + case OMPD_target_parallel: + case OMPD_target_parallel_for: + case OMPD_target_parallel_for_simd: + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: { + Expr *ThreadLimit = nullptr; + Expr *NumThreads = nullptr; + if (D.hasClausesOfKind<OMPThreadLimitClause>()) { + const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); + ThreadLimit = ThreadLimitClause->getThreadLimit(); + if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) + if (auto Constant = + ThreadLimit->getIntegerConstantExpr(CGF.getContext())) + DefaultVal = Constant->getExtValue(); + } + if (D.hasClausesOfKind<OMPNumThreadsClause>()) { + const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); + NumThreads = NumThreadsClause->getNumThreads(); + if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { + if (auto Constant = + NumThreads->getIntegerConstantExpr(CGF.getContext())) { + if (Constant->getExtValue() < DefaultVal) { + DefaultVal = Constant->getExtValue(); + ThreadLimit = NumThreads; + } + } + } + } + return ThreadLimit; + } + case OMPD_target_teams_distribute_simd: + case OMPD_target_simd: + DefaultVal = 1; + return nullptr; + case OMPD_parallel: + case OMPD_for: + case OMPD_parallel_for: + case OMPD_parallel_master: + case OMPD_parallel_sections: + case OMPD_for_simd: + case OMPD_parallel_for_simd: + case OMPD_cancel: + case OMPD_cancellation_point: + case OMPD_ordered: + case OMPD_threadprivate: + case OMPD_allocate: + case OMPD_task: + case OMPD_simd: + case OMPD_tile: + case OMPD_unroll: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_atomic: + case OMPD_flush: + case OMPD_depobj: + case OMPD_scan: + case OMPD_teams: + case OMPD_target_data: + case OMPD_target_exit_data: + case OMPD_target_enter_data: + case OMPD_distribute: + case OMPD_distribute_simd: + case OMPD_distribute_parallel_for: + case OMPD_distribute_parallel_for_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_update: + case OMPD_declare_simd: + case OMPD_declare_variant: + case OMPD_begin_declare_variant: + case OMPD_end_declare_variant: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_declare_reduction: + case OMPD_declare_mapper: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: + case OMPD_requires: + case OMPD_unknown: + break; + default: + break; + } + llvm_unreachable("Unsupported directive kind."); +} + +llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( + CodeGenFunction &CGF, const OMPExecutableDirective &D) { assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the teams directive expected to be emitted " "only for the host!"); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 491ef9b7aaf3..c24648aae7e1 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -340,6 +340,35 @@ protected: llvm::Value *emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags = 0); + /// Emit the number of teams for a target directive. Inspect the num_teams + /// clause associated with a teams construct combined or closely nested + /// with the target directive. + /// + /// Emit a team of size one for directives such as 'target parallel' that + /// have no associated teams construct. + /// + /// Otherwise, return nullptr. + const Expr *getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + int32_t &DefaultVal); + llvm::Value *emitNumTeamsForTargetDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &D); + /// Emit the number of threads for a target directive. Inspect the + /// thread_limit clause associated with a teams construct combined or closely + /// nested with the target directive. + /// + /// Emit the num_threads clause for directives such as 'target parallel' that + /// have no associated teams construct. + /// + /// Otherwise, return nullptr. + const Expr * + getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + int32_t &DefaultVal); + llvm::Value * + emitNumThreadsForTargetDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &D); + /// Returns pointer to ident_t type. llvm::Type *getIdentTyPointerTy(); diff --git a/clang/test/OpenMP/declare_target_codegen_globalization.cpp b/clang/test/OpenMP/declare_target_codegen_globalization.cpp index f8e2a33ca959..47610f7b84a8 100644 --- a/clang/test/OpenMP/declare_target_codegen_globalization.cpp +++ b/clang/test/OpenMP/declare_target_codegen_globalization.cpp @@ -48,7 +48,7 @@ int maini1() { // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -58,15 +58,15 @@ int maini1() { // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooRi(i32* nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: [[CALL1:%.*]] = call i32 @_Z3barv() #[[ATTR3]] +// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooRi(i32* nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR4:[0-9]+]] +// CHECK1-NEXT: [[CALL1:%.*]] = call i32 @_Z3barv() #[[ATTR4]] // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_Z3fooRi -// CHECK1-SAME: (i32* nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -76,11 +76,11 @@ int maini1() { // // // CHECK1-LABEL: define {{[^@]+}}@_Z3barv -// CHECK1-SAME: () #[[ATTR1]] { +// CHECK1-SAME: () #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A]] to i32* -// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooRi(i32* nonnull align 4 dereferenceable(4) [[A_ON_STACK]]) #[[ATTR3]] +// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooRi(i32* nonnull align 4 dereferenceable(4) [[A_ON_STACK]]) #[[ATTR4]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[A]], i64 4) // CHECK1-NEXT: ret i32 [[CALL]] // diff --git a/clang/test/OpenMP/nvptx_lambda_capturing.cpp b/clang/test/OpenMP/nvptx_lambda_capturing.cpp index 6fe58725a623..b19806df1aee 100644 --- a/clang/test/OpenMP/nvptx_lambda_capturing.cpp +++ b/clang/test/OpenMP/nvptx_lambda_capturing.cpp @@ -397,7 +397,7 @@ int main(int argc, char **argv) { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43 -// CHECK1-SAME: (i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 @@ -630,7 +630,7 @@ int main(int argc, char **argv) { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29 -// CHECK1-SAME: (%struct.S* [[THIS:%.*]], %class.anon.0* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (%struct.S* [[THIS:%.*]], %class.anon.0* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[L_ADDR:%.*]] = alloca %class.anon.0*, align 8 @@ -715,7 +715,7 @@ int main(int argc, char **argv) { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18 -// CHECK1-SAME: (%class.anon.0* nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (%class.anon.0* nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_ADDR:%.*]] = alloca %class.anon.0*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca %class.anon.0*, align 8 @@ -805,7 +805,7 @@ int main(int argc, char **argv) { // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29 -// CHECK2-SAME: (%struct.S* [[THIS:%.*]], %class.anon* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR0]] { +// CHECK2-SAME: (%struct.S* [[THIS:%.*]], %class.anon* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK2-NEXT: [[L_ADDR:%.*]] = alloca %class.anon*, align 8 @@ -937,7 +937,7 @@ int main(int argc, char **argv) { // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43 -// CHECK2-SAME: (i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon.0* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR0]] { +// CHECK2-SAME: (i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon.0* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 @@ -1072,7 +1072,7 @@ int main(int argc, char **argv) { // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18 -// CHECK2-SAME: (%class.anon* nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR0]] { +// CHECK2-SAME: (%class.anon* nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[T_ADDR:%.*]] = alloca %class.anon*, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca %class.anon*, align 8 @@ -1193,7 +1193,7 @@ int main(int argc, char **argv) { // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43 -// CHECK3-SAME: (i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR0]] { +// CHECK3-SAME: (i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 @@ -1374,7 +1374,7 @@ int main(int argc, char **argv) { // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29 -// CHECK3-SAME: (%struct.S* [[THIS:%.*]], %class.anon.0* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR0]] { +// CHECK3-SAME: (%struct.S* [[THIS:%.*]], %class.anon.0* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK3-NEXT: [[L_ADDR:%.*]] = alloca %class.anon.0*, align 8 @@ -1436,7 +1436,7 @@ int main(int argc, char **argv) { // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18 -// CHECK3-SAME: (%class.anon.0* nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR0]] { +// CHECK3-SAME: (%class.anon.0* nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_ADDR:%.*]] = alloca %class.anon.0*, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca %class.anon.0*, align 8 @@ -1557,7 +1557,7 @@ int main(int argc, char **argv) { // // // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43 -// CHECK4-SAME: (i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR0]] { +// CHECK4-SAME: (i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 @@ -1738,7 +1738,7 @@ int main(int argc, char **argv) { // // // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29 -// CHECK4-SAME: (%struct.S* [[THIS:%.*]], %class.anon.0* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR0]] { +// CHECK4-SAME: (%struct.S* [[THIS:%.*]], %class.anon.0* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK4-NEXT: [[L_ADDR:%.*]] = alloca %class.anon.0*, align 8 @@ -1800,7 +1800,7 @@ int main(int argc, char **argv) { // // // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18 -// CHECK4-SAME: (%class.anon.0* nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR0]] { +// CHECK4-SAME: (%class.anon.0* nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[T_ADDR:%.*]] = alloca %class.anon.0*, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca %class.anon.0*, align 8 diff --git a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp index e04e83527afc..232a2a33e903 100644 --- a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp @@ -43,18 +43,18 @@ int main() { // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @_Z3usev() #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void @_Z3usev() #[[ATTR6:[0-9]+]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_Z3usev -// CHECK1-SAME: () #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) @@ -64,13 +64,13 @@ int main() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 -// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: call void @_Z3usev() #[[ATTR5]] +// CHECK1-NEXT: call void @_Z3usev() #[[ATTR6]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -78,18 +78,18 @@ int main() { // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @_Z4workv() #[[ATTR5]] +// CHECK1-NEXT: call void @_Z4workv() #[[ATTR6]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -99,7 +99,7 @@ int main() { // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: ret void // // @@ -121,18 +121,18 @@ int main() { // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: call void @_Z3usev() #[[ATTR5:[0-9]+]] +// CHECK2-NEXT: call void @_Z3usev() #[[ATTR6:[0-9]+]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@_Z3usev -// CHECK2-SAME: () #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: () #[[ATTR2:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) @@ -142,13 +142,13 @@ int main() { // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 -// CHECK2-SAME: () #[[ATTR0]] { +// CHECK2-SAME: () #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: call void @_Z3usev() #[[ATTR5]] +// CHECK2-NEXT: call void @_Z3usev() #[[ATTR6]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -156,18 +156,18 @@ int main() { // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: call void @_Z4workv() #[[ATTR5]] +// CHECK2-NEXT: call void @_Z4workv() #[[ATTR6]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -177,7 +177,7 @@ int main() { // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: ret void // // @@ -199,18 +199,18 @@ int main() { // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: call void @_Z3usev() #[[ATTR5:[0-9]+]] +// CHECK3-NEXT: call void @_Z3usev() #[[ATTR6:[0-9]+]] // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_Z3usev -// CHECK3-SAME: () #[[ATTR1:[0-9]+]] { +// CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) @@ -220,13 +220,13 @@ int main() { // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 -// CHECK3-SAME: () #[[ATTR0]] { +// CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: call void @_Z3usev() #[[ATTR5]] +// CHECK3-NEXT: call void @_Z3usev() #[[ATTR6]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -234,18 +234,18 @@ int main() { // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: call void @_Z4workv() #[[ATTR5]] +// CHECK3-NEXT: call void @_Z4workv() #[[ATTR6]] // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -255,6 +255,6 @@ int main() { // CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2:[0-9]+]] +// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp index c1a3b5b699e0..5bf54060b85a 100644 --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -173,7 +173,7 @@ void unreachable_call() { // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32** nonnull align 8 dereferenceable(8) [[PTR1:%.*]], i32** nonnull align 8 dereferenceable(8) [[PTR2:%.*]]) #[[ATTR0]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32** nonnull align 8 dereferenceable(8) [[PTR1:%.*]], i32** nonnull align 8 dereferenceable(8) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -193,7 +193,7 @@ void unreachable_call() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 -// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -206,7 +206,7 @@ void unreachable_call() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47 -// CHECK1-SAME: (i64 [[AA:%.*]]) #[[ATTR0]] { +// CHECK1-SAME: (i64 [[AA:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 @@ -232,7 +232,7 @@ void unreachable_call() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53 -// CHECK1-SAME: (i64 [[A:%.*]], [10 x float]* nonnull align 4 dereferenceable(40) [[B:%.*]], i64 [[VLA:%.*]], float* nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* nonnull align 8 dereferenceable(400) [[C:%.*]], i64 [[VLA1:%.*]], i64 [[VLA3:%.*]], double* nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] { +// CHECK1-SAME: (i64 [[A:%.*]], [10 x float]* nonnull align 4 dereferenceable(40) [[B:%.*]], i64 [[VLA:%.*]], float* nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* nonnull align 8 dereferenceable(400) [[C:%.*]], i64 [[VLA1:%.*]], i64 [[VLA3:%.*]], double* nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 @@ -301,7 +301,7 @@ void unreachable_call() { // CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV19]], 1 // CHECK1-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8 // CHECK1-NEXT: store i8 [[CONV21]], i8* [[Y]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(8) i64* @_ZN2TTIxcEixEi(%struct.TT* nonnull align 8 dereferenceable(16) [[TMP7]], i32 0) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(8) i64* @_ZN2TTIxcEixEi(%struct.TT* nonnull align 8 dereferenceable(16) [[TMP7]], i32 0) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[CALL]], align 8 // CHECK1-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP17]], 1 // CHECK1-NEXT: store i64 [[ADD22]], i64* [[CALL]], align 8 @@ -312,7 +312,7 @@ void unreachable_call() { // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2TTIxcEixEi -// CHECK1-SAME: (%struct.TT* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR2:[0-9]+]] comdat align 2 { +// CHECK1-SAME: (%struct.TT* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR3:[0-9]+]] comdat align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.TT*, align 8 // CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 @@ -324,7 +324,7 @@ void unreachable_call() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90 -// CHECK1-SAME: (i64 [[A:%.*]], i64 [[AA:%.*]], i64 [[AAA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK1-SAME: (i64 [[A:%.*]], i64 [[AA:%.*]], i64 [[AAA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 @@ -366,7 +366,7 @@ void unreachable_call() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108 -// CHECK1-SAME: (%struct.S1* [[THIS:%.*]], i64 [[B:%.*]], i64 [[VLA:%.*]], i64 [[VLA1:%.*]], i16* nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// CHECK1-SAME: (%struct.S1* [[THIS:%.*]], i64 [[B:%.*]], i64 [[VLA:%.*]], i64 [[VLA1:%.*]], i16* nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 @@ -405,7 +405,7 @@ void unreachable_call() { // CHECK1-NEXT: [[TMP8:%.*]] = load double, double* [[A7]], align 8 // CHECK1-NEXT: [[CONV8:%.*]] = fptosi double [[TMP8]] to i32 // CHECK1-NEXT: [[A9:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV8]], double* nonnull align 8 dereferenceable(8) [[A9]]) #[[ATTR5]] +// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV8]], double* nonnull align 8 dereferenceable(8) [[A9]]) #[[ATTR6]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -413,7 +413,7 @@ void unreachable_call() { // // // CHECK1-LABEL: define {{[^@]+}}@_Z3baziRd -// CHECK1-SAME: (i32 [[F1:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32 [[F1:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 @@ -437,13 +437,13 @@ void unreachable_call() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 -// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: call void @_Z6asserti(i32 0) #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: call void @_Z6asserti(i32 0) #[[ATTR7:[0-9]+]] // CHECK1-NEXT: unreachable // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -453,7 +453,7 @@ void unreachable_call() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 -// CHECK1-SAME: (i64 [[A:%.*]], i64 [[AA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK1-SAME: (i64 [[A:%.*]], i64 [[AA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 @@ -487,7 +487,7 @@ void unreachable_call() { // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[F:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR0]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[F:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -510,7 +510,7 @@ void unreachable_call() { // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -527,7 +527,7 @@ void unreachable_call() { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 // CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to double** // CHECK1-NEXT: [[TMP8:%.*]] = load double*, double** [[TMP7]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], double* [[TMP8]]) #[[ATTR1:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], double* [[TMP8]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: ret void // // @@ -560,7 +560,7 @@ void unreachable_call() { // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32** nonnull align 4 dereferenceable(4) [[PTR1:%.*]], i32** nonnull align 4 dereferenceable(4) [[PTR2:%.*]]) #[[ATTR0]] { +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32** nonnull align 4 dereferenceable(4) [[PTR1:%.*]], i32** nonnull align 4 dereferenceable(4) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 @@ -580,7 +580,7 @@ void unreachable_call() { // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 -// CHECK2-SAME: () #[[ATTR0]] { +// CHECK2-SAME: () #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -593,7 +593,7 @@ void unreachable_call() { // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47 -// CHECK2-SAME: (i32 [[AA:%.*]]) #[[ATTR0]] { +// CHECK2-SAME: (i32 [[AA:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 @@ -619,7 +619,7 @@ void unreachable_call() { // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53 -// CHECK2-SAME: (i32 [[A:%.*]], [10 x float]* nonnull align 4 dereferenceable(40) [[B:%.*]], i32 [[VLA:%.*]], float* nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* nonnull align 8 dereferenceable(400) [[C:%.*]], i32 [[VLA1:%.*]], i32 [[VLA3:%.*]], double* nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] { +// CHECK2-SAME: (i32 [[A:%.*]], [10 x float]* nonnull align 4 dereferenceable(40) [[B:%.*]], i32 [[VLA:%.*]], float* nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* nonnull align 8 dereferenceable(400) [[C:%.*]], i32 [[VLA1:%.*]], i32 [[VLA3:%.*]], double* nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 @@ -687,7 +687,7 @@ void unreachable_call() { // CHECK2-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 // CHECK2-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 // CHECK2-NEXT: store i8 [[CONV20]], i8* [[Y]], align 8 -// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(8) i64* @_ZN2TTIxcEixEi(%struct.TT* nonnull align 8 dereferenceable(16) [[TMP7]], i32 0) #[[ATTR5:[0-9]+]] +// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(8) i64* @_ZN2TTIxcEixEi(%struct.TT* nonnull align 8 dereferenceable(16) [[TMP7]], i32 0) #[[ATTR6:[0-9]+]] // CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[CALL]], align 8 // CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1 // CHECK2-NEXT: store i64 [[ADD21]], i64* [[CALL]], align 8 @@ -698,7 +698,7 @@ void unreachable_call() { // // // CHECK2-LABEL: define {{[^@]+}}@_ZN2TTIxcEixEi -// CHECK2-SAME: (%struct.TT* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR2:[0-9]+]] comdat align 2 { +// CHECK2-SAME: (%struct.TT* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR3:[0-9]+]] comdat align 2 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.TT*, align 4 // CHECK2-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 @@ -710,7 +710,7 @@ void unreachable_call() { // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90 -// CHECK2-SAME: (i32 [[A:%.*]], i32 [[AA:%.*]], i32 [[AAA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK2-SAME: (i32 [[A:%.*]], i32 [[AA:%.*]], i32 [[AAA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 @@ -751,7 +751,7 @@ void unreachable_call() { // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108 -// CHECK2-SAME: (%struct.S1* [[THIS:%.*]], i32 [[B:%.*]], i32 [[VLA:%.*]], i32 [[VLA1:%.*]], i16* nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// CHECK2-SAME: (%struct.S1* [[THIS:%.*]], i32 [[B:%.*]], i32 [[VLA:%.*]], i32 [[VLA1:%.*]], i16* nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 @@ -789,7 +789,7 @@ void unreachable_call() { // CHECK2-NEXT: [[TMP8:%.*]] = load double, double* [[A6]], align 8 // CHECK2-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32 </cut>

4 years, 9 months

Jump to page:

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

linaro-toolchain